Class: Duckdb

Inherits:
Object
  • Object
show all
Defined in:
lib/raka/lang/duckdb/impl.rb

Overview

DuckDB protocol with two modes:

  1. Persistent mode: operations on .db file with CREATE TABLE

  2. Ad-hoc mode: parquet in/out using COPY operations

Instance Method Summary collapse

Constructor Details

#initialize(database: nil, params: {}, before: nil, after: nil, format: nil) ⇒ Duckdb

Returns a new instance of Duckdb.



20
21
22
23
24
25
26
27
# File 'lib/raka/lang/duckdb/impl.rb', line 20

def initialize(database: nil, params: {}, before: nil, after: nil, format: nil)
  @params = params
  @database = database
  @mode = @database ? :persistent : :adhoc
  @before = before
  @after = after
  @format = format&.upcase
end

Instance Method Details

#build(code, task) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/raka/lang/duckdb/impl.rb', line 53

def build(code, task)
  # Process parameter placeholders for all parts
  main_sql = process_params(code)
  before_sql = process_params(@before)
  after_sql = process_params(@after)

  # Build SQL parts as separate statements
  sql_parts = []

  # Add before hook if present
  sql_parts << before_sql if before_sql

  # Add main query based on mode
  case @mode
  when :persistent
    sql_parts << 'DROP TABLE IF EXISTS :_name_;'
    sql_parts << "CREATE TABLE :_name_ AS (#{main_sql});"
  when :adhoc
    # Determine format: use explicit format if provided, otherwise detect from output filename
    format = @format || detect_format_from_extension(task.name)
    sql_parts << "COPY (#{main_sql}) TO ':output:' (FORMAT #{format});"
  end

  # Add after hook if present
  sql_parts << after_sql if after_sql

  sql_parts.join("\n")
end

#detect_format_from_extension(filename) ⇒ Object



48
49
50
51
# File 'lib/raka/lang/duckdb/impl.rb', line 48

def detect_format_from_extension(filename)
  ext = File.extname(filename)[1..]&.upcase  # Remove dot and convert to uppercase
  ext || 'PARQUET'  # Default fallback
end

#duckdb_cmdObject



29
30
31
32
33
34
35
36
# File 'lib/raka/lang/duckdb/impl.rb', line 29

def duckdb_cmd
  case @mode
  when :persistent
    "duckdb #{@database}"
  when :adhoc
    'duckdb'
  end
end

#process_params(code) ⇒ Object



38
39
40
41
42
43
44
45
46
# File 'lib/raka/lang/duckdb/impl.rb', line 38

def process_params(code)
  return code if code.nil?

  processed_code = code
  (@params || {}).each do |key, value|
    processed_code = processed_code.gsub("$#{key}", "'#{value}'")
  end
  processed_code
end

#run_script(env, fname, task) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/raka/lang/duckdb/impl.rb', line 82

def run_script(env, fname, task)
  case @mode
  when :persistent
    # Split the SQL into separate statements and execute them individually
    bash env, %(
    # Execute the combined SQL script with proper variable replacement
    cat #{fname} | sed 's|:_name_|#{task.output_stem}|g' | #{duckdb_cmd} | sed -z '$ s/\\n$//' | tee #{fname}.log
    echo "#{@database}" > #{task.name}
    )
  when :adhoc
    bash env, %(
    cat #{fname} | sed 's|:output:|#{task.name}|g' | #{duckdb_cmd} | sed -z '$ s/\\n$//' | tee #{fname}.log
    )
  end
end