Module: Sequel::DuckDB::DatasetMethods

Included in:
Dataset
Defined in:
lib/sequel/adapters/shared/duckdb.rb

Overview

DatasetMethods module provides shared dataset functionality for DuckDB adapter This module is included by the main Dataset class to provide SQL generation and query execution capabilities.

Since:

  • 0.1.0

Constant Summary collapse

DUCKDB_RESERVED_WORDS =

DuckDB reserved words that must be quoted

Since:

  • 0.1.0

%w[
  order group select from where having limit offset union all distinct
  case when then else end and or not in like between is null true false
  join inner left right full outer on using as with recursive
  create table view index drop alter insert update delete
  primary key foreign references constraint unique check default
  auto_increment serial bigserial smallserial
  integer int bigint smallint tinyint boolean bool
  varchar char text string blob
  date time timestamp datetime interval
  float double real decimal numeric
  array struct map
].freeze

Instance Method Summary collapse

Instance Method Details

#add_index_hints(columns) ⇒ Dataset

Add index optimization hints to the dataset

Parameters:

  • columns (Array)

    Columns that might benefit from index usage

Returns:

  • (Dataset)

    Dataset with index hints

Since:

  • 0.1.0



2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2217

def add_index_hints(columns)
  # Get available indexes for the table
  table_name = @opts[:from]&.first
  return self unless table_name

  available_indexes = begin
    db.indexes(table_name)
  rescue StandardError
    {}
  end

  # Find indexes that match the columns
  matching_indexes = available_indexes.select do |_index_name, index_info|
    index_columns = index_info[:columns] || []
    columns.any? { |col| index_columns.include?(col.to_sym) }
  end

  # Add index hints to options
  clone(index_hints: matching_indexes.keys)
end

#add_parallel_hints(sql) ⇒ Object (private)

Add parallel execution hints to SQL

Since:

  • 0.1.0



2323
2324
2325
2326
2327
2328
2329
2330
2331
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2323

def add_parallel_hints(sql)
  # DuckDB handles parallelization automatically, but we can add configuration
  if @opts[:parallel_threads]
    # NOTE: This would require connection-level configuration in practice
    # For now, we'll rely on DuckDB's automatic parallelization
  end

  sql
end

#allObject

Override all method to ensure proper model instantiation Sequel's default all method doesn't always apply row_proc correctly

Since:

  • 0.1.0



1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1773

def all
  records = []
  fetch_rows(select_sql) do |row|
    # Apply row_proc if it exists (for model instantiation)
    row_proc = @row_proc || opts[:row_proc]
    processed_row = row_proc ? row_proc.call(row) : row
    records << processed_row
  end
  records
end

#analyze_queryHash

Get detailed query analysis including index usage

Returns:

  • (Hash)

    Analysis information

Since:

  • 0.1.0



2178
2179
2180
2181
2182
2183
2184
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2178

def analyze_query
  {
    plan: explain,
    indexes_used: extract_indexes_from_plan(explain),
    optimization_hints: generate_optimization_hints
  }
end

#bulk_insert_optimized(rows) ⇒ Object

Optimized bulk insert implementation using DuckDB's capabilities

Since:

  • 0.1.0



1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1918

def bulk_insert_optimized(rows)
  return 0 if rows.empty?

  # Get column names from first row
  columns = rows.first.keys

  # Get table name from opts[:from]
  table_name = @opts[:from].first

  # Build optimized INSERT statement with VALUES clause
  # DuckDB handles multiple VALUES efficiently
  values_placeholders = rows.map { |_| "(#{columns.map { "?" }.join(", ")})" }.join(", ")
  table_sql = String.new
  quote_identifier_append(table_sql, table_name)
  col_list = columns.map do |c|
    col_sql = String.new
    quote_identifier_append(col_sql, c)
    col_sql
  end.join(", ")
  sql = "INSERT INTO #{table_sql} (#{col_list}) VALUES #{values_placeholders}"

  # Flatten all row values for parameter binding
  params = rows.flat_map { |row| columns.map { |col| row[col] } }

  # Execute the bulk insert
  db.execute(sql, params)

  rows.length
end

#complex_expression_sql_append(sql, operator, args) ⇒ Object

Override complex_expression_sql_append for DuckDB-specific handling

Since:

  • 0.1.0



1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1618

def complex_expression_sql_append(sql, operator, args)
  case operator
  when :LIKE
    # Generate clean LIKE without ESCAPE clause (Requirement 1.1)
    sql << "("
    literal_append(sql, args.first)
    sql << " LIKE "
    literal_append(sql, args.last)
    sql << ")"
  when :"NOT LIKE"
    # Generate clean NOT LIKE without ESCAPE clause (Requirement 1.1)
    sql << "("
    literal_append(sql, args.first)
    sql << " NOT LIKE "
    literal_append(sql, args.last)
    sql << ")"
  when :ILIKE
    # DuckDB doesn't have ILIKE, use UPPER() workaround with proper parentheses (Requirement 1.3)
    sql << "(UPPER("
    literal_append(sql, args.first)
    sql << ") LIKE UPPER("
    literal_append(sql, args.last)
    sql << "))"
  when :"NOT ILIKE"
    # Generate clean NOT ILIKE without ESCAPE clause (Requirement 1.3)
    sql << "(UPPER("
    literal_append(sql, args.first)
    sql << ") NOT LIKE UPPER("
    literal_append(sql, args.last)
    sql << "))"
  when :~
    # Regular expression matching for DuckDB with proper parentheses (Requirement 4.1, 4.3)
    # DuckDB's ~ operator has limitations with anchors, so we use regexp_matches for reliability
    sql << "(regexp_matches("
    literal_append(sql, args.first)
    sql << ", "
    literal_append(sql, args.last)
    sql << "))"
  when :"~*"
    # Case-insensitive regular expression matching for DuckDB (Requirement 4.2)
    # Use regexp_matches with case-insensitive flag
    sql << "(regexp_matches("
    literal_append(sql, args.first)
    sql << ", "
    literal_append(sql, args.last)
    sql << ", 'i'))"
  else
    super
  end
end

#convert_row_types(row, table_schema) ⇒ Object (private)

Convert row values based on column types

Since:

  • 0.1.0



1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1887

def convert_row_types(row, table_schema)
  return row unless table_schema

  converted_row = {}
  row.each do |column_name, value|
    column_info = table_schema[column_name]
    converted_row[column_name] = if column_info && column_info[:type] == :time && value.is_a?(Time)
                                   # Convert TIME columns to time-only values
                                   Time.local(1970, 1, 1, value.hour, value.min, value.sec, value.usec)
                                 else
                                   value
                                 end
  end
  converted_row
end

#count(*args, &block) ⇒ Object

Optimized count method for DuckDB Provides fast path for simple COUNT(*) queries on base tables Falls back to Sequel's implementation for complex scenarios

Since:

  • 0.1.0



2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2112

def count(*args, &block)
  # Only optimize if:
  # - No arguments or block provided
  # - No grouping, having, distinct, or where clauses
  # - Has a from clause with a table
  if args.empty? && !block && !@opts[:group] && !@opts[:having] &&
     !@opts[:distinct] && !@opts[:where] && @opts[:from]&.first
    # Use optimized COUNT(*) for simple cases
    table_name = @opts[:from].first
    table_sql = String.new
    quote_identifier_append(table_sql, table_name)
    single_value("SELECT COUNT(*) FROM #{table_sql}")
  else
    # Fall back to standard Sequel count behavior for complex cases
    super
  end
end

#cte_is_recursive?(cte_info) ⇒ Boolean (private)

Auto-detect if a CTE is recursive by analyzing its SQL for self-references

Parameters:

  • cte_info (Hash)

    CTE information hash with :name and :dataset

Returns:

  • (Boolean)

    true if the CTE appears to be recursive

Since:

  • 0.1.0



1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1426

def cte_is_recursive?(cte_info)
  return false unless cte_info[:dataset]

  cte_name = cte_info[:name].to_s
  cte_sql = cte_info[:dataset].sql

  # Check if the CTE SQL contains references to its own name
  # Look for patterns like "FROM table_name" or "JOIN table_name"
  # Use word boundaries to avoid false positives with partial matches
  recursive_pattern = /\b(?:FROM|JOIN)\s+#{Regexp.escape(cte_name)}\b/i

  cte_sql.match?(recursive_pattern)
end

#deleteInteger

Delete records from the dataset

Returns:

  • (Integer)

    Number of affected rows

Since:

  • 0.1.0



1817
1818
1819
1820
1821
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1817

def delete
  sql = delete_sql
  # Use execute_update which properly returns the row count
  db.execute_update(sql)
end

#delete_sqlString

Generate DELETE SQL statement

Returns:

  • (String)

    The DELETE SQL statement

Since:

  • 0.1.0



1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1333

def delete_sql
  return @opts[:sql] if @opts[:sql]

  sql = "DELETE FROM #{table_name_sql}"

  # Add WHERE clause
  select_where_sql(sql) if @opts[:where]

  sql
end

#eachObject

Memory-efficient streaming for large result sets (Requirement 9.5) Enhanced each method with better memory management

Since:

  • 0.1.0



2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2007

def each(&)
  return enum_for(:each) unless block_given?

  # Use streaming approach to minimize memory usage
  sql = select_sql

  # Check if SQL already has LIMIT/OFFSET - if so, don't add batching
  if sql.match?(/\bLIMIT\b/i) || sql.match?(/\bOFFSET\b/i)
    # SQL already has LIMIT/OFFSET, execute directly without batching
    fetch_rows(sql, &)
    return self
  end

  # Process results in batches to balance memory usage and performance
  batch_size = @opts[:stream_batch_size] || 1000
  offset = 0

  loop do
    # Fetch a batch of results
    batch_sql = "#{sql} LIMIT #{batch_size} OFFSET #{offset}"
    batch_count = 0

    fetch_rows(batch_sql) do |row|
      yield row
      batch_count += 1
    end

    # Break if we got fewer rows than the batch size (end of results)
    break if batch_count < batch_size

    offset += batch_size
  end

  self
end

#explainString

Get query execution plan with index usage information

Returns:

  • (String)

    Query execution plan

Since:

  • 0.1.0



2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2164

def explain
  explain_sql = "EXPLAIN #{select_sql}"
  plan_text = ""

  fetch_rows(explain_sql) do |row|
    plan_text += "#{row.values.join(" ")}\n"
  end

  plan_text
end

#extract_indexes_from_plan(plan) ⇒ Object (private)

Extract index names from query execution plan

Since:

  • 0.1.0



2273
2274
2275
2276
2277
2278
2279
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2273

def extract_indexes_from_plan(plan)
  indexes = []
  plan.scan(/idx_\w+|index\s+(\w+)/i) do |match|
    indexes << (match.is_a?(Array) ? match.first : match)
  end
  indexes.compact.uniq
end

#fetch_rows(sql) ⇒ Object

Optimized fetch_rows method for large result sets (Requirement 9.1) This method provides efficient row fetching with streaming capabilities Override the existing fetch_rows method to make it public and optimized

Since:

  • 0.1.0



1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1844

def fetch_rows(sql)
  # Use streaming approach to avoid loading all results into memory at once
  # This is particularly important for large result sets
  if block_given?
    # Get schema information for type conversion
    table_schema = table_schema_for_conversion

    # Execute with type conversion
    db.execute(sql) do |row|
      # Apply type conversion for TIME columns
      converted_row = convert_row_types(row, table_schema)
      yield converted_row
    end
  else
    # Return enumerator if no block given (for compatibility)
    enum_for(:fetch_rows, sql)
  end
end

#generate_optimization_hintsObject (private)

Generate optimization hints based on query structure

Since:

  • 0.1.0



2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2282

def generate_optimization_hints
  hints = []

  # Check for potential index usage
  hints << "Consider adding indexes on WHERE clause columns" if @opts[:where]

  # Check for ORDER BY optimization
  hints << "ORDER BY may benefit from index on ordered columns" if @opts[:order]

  # Check for GROUP BY optimization
  hints << "GROUP BY operations are optimized for columnar storage" if @opts[:group]

  hints
end

#group(*columns) ⇒ Object

Optimize aggregation queries for columnar storage

Since:

  • 0.1.0



2251
2252
2253
2254
2255
2256
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2251

def group(*columns)
  result = super

  # Add columnar aggregation optimization hints
  result.clone(columnar_aggregation: true)
end

#identifier_needs_quoting?(name) ⇒ Boolean

Check if an identifier needs quoting

Returns:

  • (Boolean)

Since:

  • 0.1.0



1259
1260
1261
1262
1263
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1259

def identifier_needs_quoting?(name)
  return true if super

  DUCKDB_RESERVED_WORDS.include?(name.to_s.downcase)
end

#input_identifier(value) ⇒ Object (private)

DuckDB uses lowercase identifiers

Since:

  • 0.1.0



1242
1243
1244
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1242

def input_identifier(value)
  value.to_s
end

#insert(values = {}) ⇒ Integer?

Insert a record into the dataset's table

Parameters:

  • values (Hash) (defaults to: {})

    Column values to insert

Returns:

  • (Integer, nil)

    Number of affected rows (always nil for DuckDB due to no AUTOINCREMENT)

Since:

  • 0.1.0



1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1788

def insert(values = {})
  sql = insert_sql(values)
  result = db.execute(sql)

  # For DuckDB, we need to return the number of affected rows
  # Since DuckDB doesn't support AUTOINCREMENT, we return nil for the ID
  # but we should return 1 to indicate successful insertion
  if result.is_a?(::DuckDB::Result)
    # DuckDB::Result doesn't have a direct way to get affected rows for INSERT
    # For INSERT operations, if no error occurred, assume 1 row was affected
    1
  else
    result
  end
end

#insert_sql(*values) ⇒ String

Generate INSERT SQL statement

Parameters:

  • values (Hash, Array)

    Values to insert

Returns:

  • (String)

    The INSERT SQL statement

Since:

  • 0.1.0



1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1269

def insert_sql(*values)
  return @opts[:sql] if @opts[:sql]

  # Handle empty values case
  if values.empty? || (values.length == 1 && values.first.empty?)
    return "INSERT INTO #{table_name_sql} DEFAULT VALUES"
  end

  # Handle single hash of values
  if values.length == 1 && values.first.is_a?(Hash)
    values_hash = values.first
    columns = values_hash.keys
    column_list = literal(columns)
    values_list = literal(columns.map { |k| values_hash[k] })

    return "INSERT INTO #{table_name_sql} #{column_list} VALUES #{values_list}"
  end

  # Handle array of hashes (multiple records)
  if values.length == 1 && values.first.is_a?(Array)
    records = values.first
    return "INSERT INTO #{table_name_sql} DEFAULT VALUES" if records.empty?

    first_record = records.first
    columns = first_record.keys
    column_list = literal(columns)

    values_lists = records.map do |record|
      literal(columns.map { |k| record[k] })
    end

    return "INSERT INTO #{table_name_sql} #{column_list} VALUES #{values_lists.join(", ")}"
  end

  # Fallback for other cases
  "INSERT INTO #{table_name_sql} DEFAULT VALUES"
end

#join(table, expr = nil, options = {}) ⇒ Object

Override join method to support USING clause syntax

Since:

  • 0.1.0



1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1670

def join(table, expr = nil, options = {})
  # Handle the case where using parameter is passed
  if options.is_a?(Hash) && options[:using]
    using_columns = Array(options[:using])
    join_type = options[:type] || :inner
    join_clause = Sequel::SQL::JoinUsingClause.new(using_columns, join_type, table)
    clone(join: (@opts[:join] || []) + [join_clause])
  else
    # Fall back to standard Sequel join behavior
    super
  end
end

#literal_append(sql, value) ⇒ Object

Override literal_append to handle DuckDB-specific type conversions Only handles cases that differ from Sequel's default behavior

Since:

  • 0.1.0



1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1726

def literal_append(sql, value)
  case value
  when Time
    # Special handling for time-only values (year 1970 indicates time-only)
    if value.year == 1970 && value.month == 1 && value.day == 1
      # This is a time-only value, use TIME format
      sql << "'#{value.strftime("%H:%M:%S")}'"
    else
      # Use our custom datetime formatting for consistency
      literal_datetime_append(sql, value)
    end
  when DateTime
    # Use our custom datetime formatting for consistency
    literal_datetime_append(sql, value)
  when String
    # Only handle binary data differently for DuckDB's hex format
    if value.encoding == Encoding::ASCII_8BIT
      literal_blob_append(sql, value)
    else
      # Let Sequel handle LiteralString and regular strings
      super
    end
  else
    super
  end
end

#literal_blob(blob) ⇒ Object

Literal conversion for binary data (BLOB type)

Since:

  • 0.1.0



1765
1766
1767
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1765

def literal_blob(blob)
  "'#{blob.unpack1("H*")}'"
end

#literal_blob_append(sql, blob) ⇒ Object

Helper method for binary data literal appending

Since:

  • 0.1.0



1759
1760
1761
1762
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1759

def literal_blob_append(sql, blob)
  # DuckDB expects BLOB literals in hex format without \x prefix
  sql << "'#{blob.unpack1("H*")}'"
end

#literal_boolean(value) ⇒ Object

Since:

  • 0.1.0



1712
1713
1714
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1712

def literal_boolean(value)
  value ? "TRUE" : "FALSE"
end

#literal_date(date) ⇒ Object

Since:

  • 0.1.0



1688
1689
1690
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1688

def literal_date(date)
  "'#{date.strftime("%Y-%m-%d")}'"
end

#literal_datetime(datetime) ⇒ Object

Since:

  • 0.1.0



1692
1693
1694
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1692

def literal_datetime(datetime)
  "'#{datetime.strftime("%Y-%m-%d %H:%M:%S")}'"
end

#literal_datetime_append(sql, datetime) ⇒ Object

Helper method for datetime literal appending

Since:

  • 0.1.0



1754
1755
1756
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1754

def literal_datetime_append(sql, datetime)
  sql << "'#{datetime.strftime("%Y-%m-%d %H:%M:%S")}'"
end

#literal_falseObject

Since:

  • 0.1.0



1720
1721
1722
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1720

def literal_false
  "FALSE"
end

#literal_string_append(sql, string) ⇒ Object

Override literal methods for DuckDB-specific formatting

Since:

  • 0.1.0



1684
1685
1686
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1684

def literal_string_append(sql, string)
  sql << "'" << string.gsub("'", "''") << "'"
end

#literal_symbol_append(sql, value) ⇒ Object

Override symbol literal handling to prevent asterisk from being quoted This fixes count(*) function calls which should not quote the asterisk

Since:

  • 0.1.0



1702
1703
1704
1705
1706
1707
1708
1709
1710
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1702

def literal_symbol_append(sql, value)
  # Special case for asterisk - don't quote it
  if value == :*
    sql << "*"
  else
    # Use standard Sequel symbol handling for all other symbols
    super
  end
end

#literal_time(time) ⇒ Object

Since:

  • 0.1.0



1696
1697
1698
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1696

def literal_time(time)
  "'#{time.strftime("%H:%M:%S")}'"
end

#literal_trueObject

Since:

  • 0.1.0



1716
1717
1718
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1716

def literal_true
  "TRUE"
end

#memory_usageObject (private)

Get approximate memory usage for streaming optimization

Since:

  • 0.1.0



2102
2103
2104
2105
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2102

def memory_usage
  GC.start
  ObjectSpace.count_objects[:TOTAL] * 40
end

#multi_insert(columns = nil) ⇒ Object

Enhanced bulk insert optimization (Requirement 9.3) Override multi_insert to use DuckDB's efficient bulk loading capabilities

Since:

  • 0.1.0



1907
1908
1909
1910
1911
1912
1913
1914
1915
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1907

def multi_insert(columns = nil, &)
  if columns.is_a?(Array) && !columns.empty? && columns.first.is_a?(Hash)
    # Handle array of hashes (most common case)
    bulk_insert_optimized(columns)
  else
    # Fall back to standard Sequel behavior for other cases
    super
  end
end

#optimize_for_columnar_projection(sql) ⇒ Object (private)

Optimize SQL for columnar projection

Since:

  • 0.1.0



2298
2299
2300
2301
2302
2303
2304
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2298

def optimize_for_columnar_projection(sql)
  # Add DuckDB-specific hints for columnar projection
  if @opts[:columnar_optimized]
    # DuckDB automatically optimizes column access, but we can add hints
  end
  sql
end

#optimize_for_result_size(sql) ⇒ Object (private)

Helper method to optimize query execution based on result set size

Since:

  • 0.1.0



2149
2150
2151
2152
2153
2154
2155
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2149

def optimize_for_result_size(sql)
  # Add DuckDB-specific optimization hints if needed
  if @opts[:small_result_set]
    # For small result sets, DuckDB can use different optimization strategies
  end
  sql
end

#order(*columns) ⇒ Object

Override order method to leverage index optimization

Since:

  • 0.1.0



2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2197

def order(*columns)
  result = super

  # Add index hints for ORDER BY optimization
  order_columns = columns.map do |col|
    case col
    when Sequel::SQL::OrderedExpression
      col.expression
    else
      col
    end
  end

  result.add_index_hints(order_columns)
end

#order_column_sql(column) ⇒ Object

Format individual ORDER BY column

Since:

  • 0.1.0



1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1599

def order_column_sql(column)
  case column
  when Sequel::SQL::OrderedExpression
    col_sql = literal(column.expression)
    col_sql << (column.descending ? " DESC" : " ASC")
    # Check if nulls option exists (may not be available in all Sequel versions)
    if column.respond_to?(:nulls) && column.nulls
      col_sql << (column.nulls == :first ? " NULLS FIRST" : " NULLS LAST")
    end
    col_sql
  else
    literal(column)
  end
end

#output_identifier(value) ⇒ Object (private)

DuckDB uses lowercase identifiers

Since:

  • 0.1.0



1247
1248
1249
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1247

def output_identifier(value)
  value == "" ? :untitled : value.to_sym
end

#parallel(thread_count = nil) ⇒ Dataset

Enable parallel execution for the query

Parameters:

  • thread_count (Integer) (defaults to: nil)

    Number of threads to use (optional)

Returns:

  • (Dataset)

    Dataset configured for parallel execution

Since:

  • 0.1.0



2264
2265
2266
2267
2268
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2264

def parallel(thread_count = nil)
  opts = { parallel_execution: true }
  opts[:parallel_threads] = thread_count if thread_count
  clone(opts)
end

#prepare(type, name = nil, *values) ⇒ Object

Prepared statement support for performance (Requirement 9.2) Enhanced prepare method that leverages DuckDB's prepared statement capabilities

Since:

  • 0.1.0



1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1950

def prepare(type, name = nil, *values)
  # Check if DuckDB connection supports prepared statements
  if db.respond_to?(:prepare_statement)
    # Use DuckDB's native prepared statement support
    sql = case type
          when :select, :all
            select_sql
          when :first
            clone(limit: 1).select_sql
          when :insert
            insert_sql(*values)
          when :update
            update_sql(*values)
          when :delete
            delete_sql
          else
            raise ArgumentError, "Unsupported prepared statement type: #{type}"
          end

    # Create and cache prepared statement
    prepared_stmt = db.prepare_statement(sql)

    # Return a callable object that executes the prepared statement
    lambda do |*params|
      case type
      when :select, :all
        prepared_stmt.execute(*params).to_a
      when :first
        result = prepared_stmt.execute(*params).first
        result
      else
        prepared_stmt.execute(*params)
      end
    end
  else
    # Fall back to standard Sequel prepared statement handling
    super
  end
end

#quote_identifiers_defaultObject

Delegate quote_identifiers_default to the database

Since:

  • 0.1.0



1254
1255
1256
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1254

def quote_identifiers_default
  db.quote_identifiers_default
end

#reserved_word?(word) ⇒ Boolean

Check if a word is a SQL reserved word that needs quoting

Returns:

  • (Boolean)

Since:

  • 0.1.0



1378
1379
1380
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1378

def reserved_word?(word)
  %w[order group select from where having limit offset].include?(word.downcase)
end

#select(*columns) ⇒ Object

Override select method to add columnar optimization

Since:

  • 0.1.0



2241
2242
2243
2244
2245
2246
2247
2248
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2241

def select(*columns)
  result = super

  # Mark as columnar-optimized if selecting specific columns
  result = result.clone(columnar_optimized: true) if columns.length.positive? && columns.length < 10

  result
end

#select_from_sql(sql) ⇒ Object

Override select_from_sql to validate table names

Since:

  • 0.1.0



1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1443

def select_from_sql(sql)
  if (f = @opts[:from])
    # Validate that no table names are nil
    f.each do |table|
      raise ArgumentError, "Table name cannot be nil" if table.nil?
    end
  end

  # Call parent implementation
  super
end

#select_group_sql(sql) ⇒ Object

Add GROUP BY clause to SQL (Requirement 6.7)

Since:

  • 0.1.0



1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1567

def select_group_sql(sql)
  return unless @opts[:group]

  sql << " GROUP BY "
  if @opts[:group].is_a?(Array)
    sql << @opts[:group].map { |col| literal(col) }.join(", ")
  else
    literal_append(sql, @opts[:group])
  end
end

#select_having_sql(sql) ⇒ Object

Add HAVING clause to SQL (Requirement 6.8)

Since:

  • 0.1.0



1579
1580
1581
1582
1583
1584
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1579

def select_having_sql(sql)
  return unless @opts[:having]

  sql << " HAVING "
  literal_append(sql, @opts[:having])
end

#select_join_sql(sql) ⇒ Object

Add JOIN clauses to SQL (Requirement 6.9)

Since:

  • 0.1.0



1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1456

def select_join_sql(sql)
  return unless @opts[:join]

  @opts[:join].each do |join| # rubocop:disable Metrics/BlockLength
    # Handle different join clause types
    case join
    when Sequel::SQL::JoinOnClause
      join_type = join.join_type || :inner
      table = join.table
      conditions = join.on

      # Format join type
      join_clause = case join_type
                    when :left, :left_outer
                      "LEFT JOIN"
                    when :right, :right_outer
                      "RIGHT JOIN"
                    when :full, :full_outer
                      "FULL JOIN"
                    else
                      # when :inner
                      "INNER JOIN"
                    end

      sql << " #{join_clause} "

      # Add table name
      sql << if table.is_a?(Sequel::Dataset)
               alias_sql = String.new
               quote_identifier_append(alias_sql, join.table_alias || "subquery")
               "(#{table.sql}) AS #{alias_sql}"
             else
               literal(table)
             end

      # Add ON conditions
      if conditions
        sql << " ON "
        literal_append(sql, conditions)
      end

    when Sequel::SQL::JoinUsingClause
      join_type = join.join_type || :inner
      table = join.table
      using_columns = join.using

      join_clause = case join_type
                    when :left, :left_outer
                      "LEFT JOIN"
                    when :right, :right_outer
                      "RIGHT JOIN"
                    when :full, :full_outer
                      "FULL JOIN"
                    else
                      # when :inner
                      "INNER JOIN"
                    end

      sql << " #{join_clause} "

      # Handle table with alias
      sql << if table.is_a?(Sequel::Dataset)
               # Subquery with alias
               "(#{table.sql})"
             else
               # Regular table (may have alias)
               literal(table)
               # Add alias if present
             end
      if join.table_alias
        sql << " AS "
        quote_identifier_append(sql, join.table_alias)
      end

      if using_columns
        sql << " USING ("
        Array(using_columns).each_with_index do |col, i|
          sql << ", " if i.positive?
          quote_identifier_append(sql, col)
        end
        sql << ")"
      end

    when Sequel::SQL::JoinClause
      join_type = join.join_type || :inner
      table = join.table

      join_clause = case join_type
                    when :cross
                      "CROSS JOIN"
                    when :natural
                      "NATURAL JOIN"
                    else
                      "INNER JOIN"
                    end

      sql << " #{join_clause} "
      sql << literal(table)
    end
  end
end

#select_order_sql(sql) ⇒ Object

Add ORDER BY clause to SQL (enhanced - Requirement 6.5)

Since:

  • 0.1.0



1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1587

def select_order_sql(sql)
  return unless @opts[:order]

  sql << " ORDER BY "
  sql << if @opts[:order].is_a?(Array)
           @opts[:order].map { |col| order_column_sql(col) }.join(", ")
         else
           order_column_sql(@opts[:order])
         end
end

#select_where_sql(sql) ⇒ Object

Add WHERE clause to SQL (enhanced for complex conditions - Requirement 6.4)

Since:

  • 0.1.0



1559
1560
1561
1562
1563
1564
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1559

def select_where_sql(sql)
  return unless @opts[:where]

  sql << " WHERE "
  literal_append(sql, @opts[:where])
end

#select_with_sql(sql) ⇒ Object (private)

Override the WITH clause generation to support RECURSIVE keyword

Since:

  • 0.1.0



1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1402

def select_with_sql(sql)
  return unless opts[:with]

  # Check if any WITH clause is recursive (either explicitly marked or auto-detected)
  has_recursive = opts[:with].any? { |w| w[:recursive] || cte_is_recursive?(w) }

  # Add WITH or WITH RECURSIVE prefix
  sql << (has_recursive ? "WITH RECURSIVE " : "WITH ")

  # Add each CTE
  opts[:with].each_with_index do |w, i|
    sql << ", " if i.positive?
    name_sql = String.new
    quote_identifier_append(name_sql, w[:name])
    sql << "#{name_sql} AS (#{w[:dataset].sql})"
  end

  sql << " "
end

#should_use_bulk_operations?(row_count) ⇒ Boolean (private)

Helper method to check if bulk operations should be used

Returns:

  • (Boolean)

Since:

  • 0.1.0



2143
2144
2145
2146
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2143

def should_use_bulk_operations?(row_count)
  # Use bulk operations for more than 10 rows
  row_count > 10
end

#should_use_parallel_execution?Boolean (private)

Determine if parallel execution should be used

Returns:

  • (Boolean)

Since:

  • 0.1.0



2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2307

def should_use_parallel_execution?
  # Use parallel execution for:
  # 1. Explicit parallel requests
  # 2. Complex aggregations
  # 3. Large joins
  # 4. Window functions

  return true if @opts[:parallel_execution]
  return true if @opts[:group] && @opts[:columnar_aggregation]
  return true if @opts[:join] && @opts[:join].length > 1
  return true if sql.downcase.include?("over(")

  false
end

#single_value(sql) ⇒ Object (private)

Get a single value from a SQL query (used by count)

Since:

  • 0.1.0



2133
2134
2135
2136
2137
2138
2139
2140
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2133

def single_value(sql)
  value = nil
  fetch_rows(sql) do |row|
    value = row.values.first
    break
  end
  value
end

#stream(sql = select_sql) {|Hash| ... } ⇒ Enumerator

Streaming result support where possible (Requirement 9.5)

Parameters:

  • sql (String) (defaults to: select_sql)

    SQL to execute

Yields:

  • (Hash)

    Block to process each row

Returns:

  • (Enumerator)

    If no block given, returns enumerator

Since:

  • 0.1.0



1828
1829
1830
1831
1832
1833
1834
1835
1836
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1828

def stream(sql = select_sql, &)
  if block_given?
    # Stream results by processing them one at a time
    fetch_rows(sql, &)
  else
    # Return enumerator for lazy evaluation
    enum_for(:stream, sql)
  end
end

#stream_batch_size(size) ⇒ Dataset

Set custom batch size for streaming operations (Requirement 9.5)

Parameters:

  • size (Integer)

    Batch size for streaming

Returns:

  • (Dataset)

    New dataset with custom batch size

Since:

  • 0.1.0



2047
2048
2049
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2047

def stream_batch_size(size)
  clone(stream_batch_size: size)
end

#stream_with_memory_limit(memory_limit) {|Hash| ... } ⇒ Enumerator

Stream results with memory limit enforcement (Requirement 9.5)

Parameters:

  • memory_limit (Integer)

    Maximum memory growth allowed in bytes

Yields:

  • (Hash)

    Block to process each row

Returns:

  • (Enumerator)

    If no block given

Since:

  • 0.1.0



2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2056

def stream_with_memory_limit(memory_limit, &)
  return enum_for(:stream_with_memory_limit, memory_limit) unless block_given?

  sql = select_sql

  # Check if SQL already has LIMIT/OFFSET - if so, don't add batching
  if sql.match?(/\bLIMIT\b/i) || sql.match?(/\bOFFSET\b/i)
    # SQL already has LIMIT/OFFSET, execute directly without batching
    fetch_rows(sql, &)
    return self
  end

  initial_memory = memory_usage
  batch_size = @opts[:stream_batch_size] || 500
  offset = 0

  loop do
    # Check memory usage before processing batch
    current_memory = memory_usage
    memory_growth = current_memory - initial_memory

    # Reduce batch size if memory usage is high
    batch_size = [batch_size / 2, 100].max if memory_growth > memory_limit * 0.8

    batch_sql = "#{sql} LIMIT #{batch_size} OFFSET #{offset}"
    batch_count = 0

    fetch_rows(batch_sql) do |row|
      yield row
      batch_count += 1

      # Force garbage collection periodically to manage memory
      GC.start if (batch_count % 100).zero?
    end

    break if batch_count < batch_size

    offset += batch_size
  end

  self
end

#supports_cte?Boolean

Returns:

  • (Boolean)

Since:

  • 0.1.0



1349
1350
1351
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1349

def supports_cte?
  true
end

#supports_join_using?Boolean

Returns:

  • (Boolean)

Since:

  • 0.1.0



1361
1362
1363
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1361

def supports_join_using?
  true
end

#supports_returning?(_type = nil) ⇒ Boolean

Returns:

  • (Boolean)

Since:

  • 0.1.0



1353
1354
1355
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1353

def supports_returning?(_type = nil)
  false
end

#supports_select_all_and_offset?Boolean

Returns:

  • (Boolean)

Since:

  • 0.1.0



1357
1358
1359
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1357

def supports_select_all_and_offset?
  true
end

#supports_window_functions?Boolean

DuckDB capability flags

Returns:

  • (Boolean)

Since:

  • 0.1.0



1345
1346
1347
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1345

def supports_window_functions?
  true
end

#table_name_sqlObject

Get properly quoted table name

Raises:

  • (ArgumentError)

Since:

  • 0.1.0



1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1383

def table_name_sql
  raise ArgumentError, "Table name cannot be nil or empty" if @opts[:from].nil? || @opts[:from].empty?

  # Check if the table name is nil
  table_name = @opts[:from].first
  raise ArgumentError, "Table name cannot be nil" if table_name.nil?

  table_name = table_name.to_s
  raise ArgumentError, "Table name cannot be empty" if table_name.empty?

  # Use quote_identifier_append to respect quote_identifiers? setting
  sql = String.new
  quote_identifier_append(sql, table_name)
  sql
end

#table_schema_for_conversionObject (private)

Get table schema information for type conversion

Since:

  • 0.1.0



1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1866

def table_schema_for_conversion
  return nil unless @opts[:from]&.first

  table_name = @opts[:from].first
  # Handle case where table name is wrapped in an identifier
  table_name = table_name.value if table_name.respond_to?(:value)

  begin
    schema_info = db.schema(table_name)
    schema_hash = {}
    schema_info.each do |column_name, column_info|
      schema_hash[column_name] = column_info
    end
    schema_hash
  rescue StandardError
    # If schema lookup fails, return nil to skip type conversion
    nil
  end
end

#update(values = {}) ⇒ Integer

Update records in the dataset

Parameters:

  • values (Hash) (defaults to: {})

    Column values to update

Returns:

  • (Integer)

    Number of affected rows

Since:

  • 0.1.0



1808
1809
1810
1811
1812
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1808

def update(values = {})
  sql = update_sql(values)
  # Use execute_update which properly returns the row count
  db.execute_update(sql)
end

#update_sql(values = {}) ⇒ String

Generate UPDATE SQL statement

Parameters:

  • values (Hash) (defaults to: {})

    Values to update

Returns:

  • (String)

    The UPDATE SQL statement

Since:

  • 0.1.0



1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1311

def update_sql(values = {})
  return @opts[:sql] if @opts[:sql]

  sql = "UPDATE #{table_name_sql} SET "

  # Add SET clause
  set_clauses = values.map do |column, value|
    col_sql = String.new
    quote_identifier_append(col_sql, column)
    "#{col_sql} = #{literal(value)}"
  end
  sql << set_clauses.join(", ")

  # Add WHERE clause
  select_where_sql(sql) if @opts[:where]

  sql
end

#validate_table_name_for_selectObject

Validate table name for SELECT operations

Since:

  • 0.1.0



1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1366

def validate_table_name_for_select
  return unless @opts[:from] # Skip if no FROM clause

  @opts[:from].each do |table|
    if table.nil? || (table.respond_to?(:to_s) && table.to_s.strip.empty?)
      raise ArgumentError,
            "Table name cannot be nil or empty"
    end
  end
end

#where(*cond) ⇒ Object

Override where method to add index-aware optimization hints

Since:

  • 0.1.0



2187
2188
2189
2190
2191
2192
2193
2194
# File 'lib/sequel/adapters/shared/duckdb.rb', line 2187

def where(*cond, &)
  result = super

  # Add index optimization hints based on WHERE conditions
  result = result.add_index_hints(cond.first.keys) if cond.length == 1 && cond.first.is_a?(Hash)

  result
end

#with_connection_poolingObject

Connection pooling optimization (Requirement 9.4) Enhanced connection management for better performance

Since:

  • 0.1.0



1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
# File 'lib/sequel/adapters/shared/duckdb.rb', line 1992

def with_connection_pooling
  # Ensure efficient connection reuse
  db.synchronize do |conn|
    # Verify connection is still valid before use
    unless db.valid_connection?(conn)
      # Reconnect if connection is invalid
      conn = db.connect(db.opts)
    end

    yield conn
  end
end