Class: Iceberg::Table

Inherits:
Object
  • Object
show all
Defined in:
lib/iceberg/table.rb

Direct Known Subclasses

StaticTable

Instance Method Summary collapse

Constructor Details

#initialize(table, catalog) ⇒ Table

Returns a new instance of Table.



3
4
5
6
# File 'lib/iceberg/table.rb', line 3

def initialize(table, catalog)
  @table = table
  @catalog = catalog
end

Instance Method Details

#append(df) ⇒ Object



138
139
140
141
142
# File 'lib/iceberg/table.rb', line 138

def append(df)
  check_catalog
  @table = @table.append(df.arrow_c_stream, @catalog)
  nil
end

#current_schemaObject Also known as: schema



44
45
46
# File 'lib/iceberg/table.rb', line 44

def current_schema
  @table.current_schema
end

#current_schema_idObject Also known as: schema_id



49
50
51
# File 'lib/iceberg/table.rb', line 49

def current_schema_id
  @table.current_schema_id
end

#current_snapshotObject



74
75
76
# File 'lib/iceberg/table.rb', line 74

def current_snapshot
  @table.current_snapshot
end

#current_snapshot_idObject



78
79
80
# File 'lib/iceberg/table.rb', line 78

def current_snapshot_id
  @table.current_snapshot_id
end

#default_partition_spec_idObject



54
55
56
# File 'lib/iceberg/table.rb', line 54

def default_partition_spec_id
  @table.default_partition_spec_id
end

#format_versionObject



8
9
10
# File 'lib/iceberg/table.rb', line 8

def format_version
  @table.format_version
end

#historyObject



66
67
68
# File 'lib/iceberg/table.rb', line 66

def history
  @table.history
end

#inspectObject

hide internal state



145
146
147
# File 'lib/iceberg/table.rb', line 145

def inspect
  to_s
end

#last_column_idObject



28
29
30
# File 'lib/iceberg/table.rb', line 28

def last_column_id
  @table.last_column_id
end

#last_partition_idObject



32
33
34
# File 'lib/iceberg/table.rb', line 32

def last_partition_id
  @table.last_partition_id
end

#last_sequence_numberObject



20
21
22
# File 'lib/iceberg/table.rb', line 20

def last_sequence_number
  @table.last_sequence_number
end

#locationObject



16
17
18
# File 'lib/iceberg/table.rb', line 16

def location
  @table.location
end

#metadata_logObject



70
71
72
# File 'lib/iceberg/table.rb', line 70

def 
  @table.
end

#next_sequence_numberObject



24
25
26
# File 'lib/iceberg/table.rb', line 24

def next_sequence_number
  @table.next_sequence_number
end

#propertiesObject



82
83
84
# File 'lib/iceberg/table.rb', line 82

def properties
  @table.properties
end

#schema_by_id(schema_id) ⇒ Object



40
41
42
# File 'lib/iceberg/table.rb', line 40

def schema_by_id(schema_id)
  @table.schema_by_id(schema_id)
end

#schemasObject



36
37
38
# File 'lib/iceberg/table.rb', line 36

def schemas
  @table.schemas
end

#snapshot_by_id(snapshot_id) ⇒ Object



62
63
64
# File 'lib/iceberg/table.rb', line 62

def snapshot_by_id(snapshot_id)
  @table.snapshot_by_id(snapshot_id)
end

#snapshotsObject



58
59
60
# File 'lib/iceberg/table.rb', line 58

def snapshots
  @table.snapshots
end

#to_polars(snapshot_id: nil, storage_options: nil) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/iceberg/table.rb', line 86

def to_polars(snapshot_id: nil, storage_options: nil)
  require "polars-df"

  files = @table.scan(snapshot_id).plan_files
  if files.empty?
    # TODO improve
    schema =
      # TODO use schema from snapshot_id
      current_schema.fields.to_h do |field|
        dtype =
          case field[:type]
          when "int"
            Polars::Int32
          when "long"
            Polars::Int64
          when "double"
            Polars::Float64
          when "string"
            Polars::String
          when "timestamp"
            Polars::Datetime
          else
            raise Todo
          end

        [field[:name], dtype]
      end

    Polars::LazyFrame.new(schema: schema)
  else
    sources = files.map { |v| v[:data_file_path] }

    deletion_files = [
      "iceberg-position-delete",
      files.map.with_index
        .select { |v, i| v[:deletes].any? }
        .to_h { |v, i| [i, v[:deletes].map { |d| d[:file_path] }] }
    ]

    Polars.scan_parquet(
      sources,
      storage_options: storage_options,
      # TODO
      # cast_options: Polars::ScanCastOptions._default_iceberg,
      # allow_missing_columns: true,
      # extra_columns: "ignore",
      # _column_mapping: column_mapping,
      _deletion_files: deletion_files
    )
  end
end

#uuidObject



12
13
14
# File 'lib/iceberg/table.rb', line 12

def uuid
  @table.uuid
end