Class: Iceberg::Table
- Inherits:
-
Object
- Object
- Iceberg::Table
- Defined in:
- lib/iceberg/table.rb
Direct Known Subclasses
Instance Method Summary collapse
- #append(df) ⇒ Object
- #current_schema ⇒ Object (also: #schema)
- #current_schema_id ⇒ Object (also: #schema_id)
- #current_snapshot ⇒ Object
- #current_snapshot_id ⇒ Object
- #default_partition_spec_id ⇒ Object
- #format_version ⇒ Object
- #history ⇒ Object
-
#initialize(table, catalog) ⇒ Table
constructor
A new instance of Table.
-
#inspect ⇒ Object
hide internal state.
- #last_column_id ⇒ Object
- #last_partition_id ⇒ Object
- #last_sequence_number ⇒ Object
- #location ⇒ Object
- #metadata_log ⇒ Object
- #next_sequence_number ⇒ Object
- #properties ⇒ Object
- #schema_by_id(schema_id) ⇒ Object
- #schemas ⇒ Object
- #snapshot_by_id(snapshot_id) ⇒ Object
- #snapshots ⇒ Object
- #to_polars(snapshot_id: nil, storage_options: nil) ⇒ Object
- #uuid ⇒ Object
Constructor Details
#initialize(table, catalog) ⇒ Table
Returns a new instance of Table.
3 4 5 6 |
# File 'lib/iceberg/table.rb', line 3 def initialize(table, catalog) @table = table @catalog = catalog end |
Instance Method Details
#append(df) ⇒ Object
138 139 140 141 142 |
# File 'lib/iceberg/table.rb', line 138 def append(df) check_catalog @table = @table.append(df.arrow_c_stream, @catalog) nil end |
#current_schema ⇒ Object Also known as: schema
44 45 46 |
# File 'lib/iceberg/table.rb', line 44 def current_schema @table.current_schema end |
#current_schema_id ⇒ Object Also known as: schema_id
49 50 51 |
# File 'lib/iceberg/table.rb', line 49 def current_schema_id @table.current_schema_id end |
#current_snapshot ⇒ Object
74 75 76 |
# File 'lib/iceberg/table.rb', line 74 def current_snapshot @table.current_snapshot end |
#current_snapshot_id ⇒ Object
78 79 80 |
# File 'lib/iceberg/table.rb', line 78 def current_snapshot_id @table.current_snapshot_id end |
#default_partition_spec_id ⇒ Object
54 55 56 |
# File 'lib/iceberg/table.rb', line 54 def default_partition_spec_id @table.default_partition_spec_id end |
#format_version ⇒ Object
8 9 10 |
# File 'lib/iceberg/table.rb', line 8 def format_version @table.format_version end |
#history ⇒ Object
66 67 68 |
# File 'lib/iceberg/table.rb', line 66 def history @table.history end |
#inspect ⇒ Object
hide internal state
145 146 147 |
# File 'lib/iceberg/table.rb', line 145 def inspect to_s end |
#last_column_id ⇒ Object
28 29 30 |
# File 'lib/iceberg/table.rb', line 28 def last_column_id @table.last_column_id end |
#last_partition_id ⇒ Object
32 33 34 |
# File 'lib/iceberg/table.rb', line 32 def last_partition_id @table.last_partition_id end |
#last_sequence_number ⇒ Object
20 21 22 |
# File 'lib/iceberg/table.rb', line 20 def last_sequence_number @table.last_sequence_number end |
#location ⇒ Object
16 17 18 |
# File 'lib/iceberg/table.rb', line 16 def location @table.location end |
#metadata_log ⇒ Object
70 71 72 |
# File 'lib/iceberg/table.rb', line 70 def @table. end |
#next_sequence_number ⇒ Object
24 25 26 |
# File 'lib/iceberg/table.rb', line 24 def next_sequence_number @table.next_sequence_number end |
#properties ⇒ Object
82 83 84 |
# File 'lib/iceberg/table.rb', line 82 def properties @table.properties end |
#schema_by_id(schema_id) ⇒ Object
40 41 42 |
# File 'lib/iceberg/table.rb', line 40 def schema_by_id(schema_id) @table.schema_by_id(schema_id) end |
#schemas ⇒ Object
36 37 38 |
# File 'lib/iceberg/table.rb', line 36 def schemas @table.schemas end |
#snapshot_by_id(snapshot_id) ⇒ Object
62 63 64 |
# File 'lib/iceberg/table.rb', line 62 def snapshot_by_id(snapshot_id) @table.snapshot_by_id(snapshot_id) end |
#snapshots ⇒ Object
58 59 60 |
# File 'lib/iceberg/table.rb', line 58 def snapshots @table.snapshots end |
#to_polars(snapshot_id: nil, storage_options: nil) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
# File 'lib/iceberg/table.rb', line 86 def to_polars(snapshot_id: nil, storage_options: nil) require "polars-df" files = @table.scan(snapshot_id).plan_files if files.empty? # TODO improve schema = # TODO use schema from snapshot_id current_schema.fields.to_h do |field| dtype = case field[:type] when "int" Polars::Int32 when "long" Polars::Int64 when "double" Polars::Float64 when "string" Polars::String when "timestamp" Polars::Datetime else raise Todo end [field[:name], dtype] end Polars::LazyFrame.new(schema: schema) else sources = files.map { |v| v[:data_file_path] } deletion_files = [ "iceberg-position-delete", files.map.with_index .select { |v, i| v[:deletes].any? } .to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] } ] Polars.scan_parquet( sources, storage_options: , # TODO # cast_options: Polars::ScanCastOptions._default_iceberg, # allow_missing_columns: true, # extra_columns: "ignore", # _column_mapping: column_mapping, _deletion_files: deletion_files ) end end |
#uuid ⇒ Object
12 13 14 |
# File 'lib/iceberg/table.rb', line 12 def uuid @table.uuid end |