Class: Duckdb
- Inherits:
-
Object
- Object
- Duckdb
- Defined in:
- lib/raka/lang/duckdb/impl.rb
Overview
DuckDB protocol with two modes:
-
Persistent mode: operations on .db file with CREATE TABLE
-
Ad-hoc mode: parquet in/out using COPY operations
Instance Method Summary collapse
- #build(code, task) ⇒ Object
- #detect_format_from_extension(filename) ⇒ Object
- #duckdb_cmd ⇒ Object
-
#initialize(database: nil, params: {}, before: nil, after: nil, format: nil) ⇒ Duckdb
constructor
A new instance of Duckdb.
- #process_params(code) ⇒ Object
- #run_script(env, fname, task) ⇒ Object
Constructor Details
#initialize(database: nil, params: {}, before: nil, after: nil, format: nil) ⇒ Duckdb
Returns a new instance of Duckdb.
20 21 22 23 24 25 26 27 |
# File 'lib/raka/lang/duckdb/impl.rb', line 20 def initialize(database: nil, params: {}, before: nil, after: nil, format: nil) @params = params @database = database @mode = @database ? :persistent : :adhoc @before = before @after = after @format = format&.upcase end |
Instance Method Details
#build(code, task) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/raka/lang/duckdb/impl.rb', line 53 def build(code, task) # Process parameter placeholders for all parts main_sql = process_params(code) before_sql = process_params(@before) after_sql = process_params(@after) # Build SQL parts as separate statements sql_parts = [] # Add before hook if present sql_parts << before_sql if before_sql # Add main query based on mode case @mode when :persistent sql_parts << 'DROP TABLE IF EXISTS :_name_;' sql_parts << "CREATE TABLE :_name_ AS (#{main_sql});" when :adhoc # Determine format: use explicit format if provided, otherwise detect from output filename format = @format || detect_format_from_extension(task.name) sql_parts << "COPY (#{main_sql}) TO ':output:' (FORMAT #{format});" end # Add after hook if present sql_parts << after_sql if after_sql sql_parts.join("\n") end |
#detect_format_from_extension(filename) ⇒ Object
48 49 50 51 |
# File 'lib/raka/lang/duckdb/impl.rb', line 48 def detect_format_from_extension(filename) ext = File.extname(filename)[1..]&.upcase # Remove dot and convert to uppercase ext || 'PARQUET' # Default fallback end |
#duckdb_cmd ⇒ Object
29 30 31 32 33 34 35 36 |
# File 'lib/raka/lang/duckdb/impl.rb', line 29 def duckdb_cmd case @mode when :persistent "duckdb #{@database}" when :adhoc 'duckdb' end end |
#process_params(code) ⇒ Object
38 39 40 41 42 43 44 45 46 |
# File 'lib/raka/lang/duckdb/impl.rb', line 38 def process_params(code) return code if code.nil? processed_code = code (@params || {}).each do |key, value| processed_code = processed_code.gsub("$#{key}", "'#{value}'") end processed_code end |
#run_script(env, fname, task) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/raka/lang/duckdb/impl.rb', line 82 def run_script(env, fname, task) case @mode when :persistent # Split the SQL into separate statements and execute them individually bash env, %( # Execute the combined SQL script with proper variable replacement cat #{fname} | sed 's|:_name_|#{task.output_stem}|g' | #{duckdb_cmd} | sed -z '$ s/\\n$//' | tee #{fname}.log echo "#{@database}" > #{task.name} ) when :adhoc bash env, %( cat #{fname} | sed 's|:output:|#{task.name}|g' | #{duckdb_cmd} | sed -z '$ s/\\n$//' | tee #{fname}.log ) end end |