Class: Arrow::Table

Inherits:
Object
  • Object
show all
Includes:
RecordContainable
Defined in:
lib/arrow/table.rb

Class Method Summary collapse

Instance Method Summary collapse

Methods included from RecordContainable

#each_column, #each_record, #find_column

Constructor Details

#initialize(schema_or_raw_table_or_columns, columns = nil) ⇒ Table

Returns a new instance of Table.



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/arrow/table.rb', line 33

def initialize(schema_or_raw_table_or_columns, columns=nil)
  if columns.nil?
    if schema_or_raw_table_or_columns[0].is_a?(Column)
      columns = schema_or_raw_table_or_columns
      fields = columns.collect(&:field)
      schema = Schema.new(fields)
    else
      raw_table = schema_or_raw_table_or_columns
      fields = []
      columns = []
      raw_table.each do |name, array|
        field = Field.new(name.to_s, array.value_data_type)
        fields << field
        columns << Column.new(field, array)
      end
      schema = Schema.new(fields)
    end
  else
    schema = schema_or_raw_table_or_columns
  end
  initialize_raw(schema, columns)
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(name, *args, &block) ⇒ Object



302
303
304
305
306
307
308
# File 'lib/arrow/table.rb', line 302

def method_missing(name, *args, &block)
  if args.empty?
    column = find_column(name)
    return column if column
  end
  super
end

Class Method Details

.load(path, options = {}) ⇒ Object



26
27
28
# File 'lib/arrow/table.rb', line 26

def load(path, options={})
  TableLoader.load(path, options)
end

Instance Method Details

#columnsObject



56
57
58
# File 'lib/arrow/table.rb', line 56

def columns
  @columns ||= n_columns.times.collect {|i| get_column(i)}
end

#each_record_batchObject



60
61
62
63
64
65
66
67
# File 'lib/arrow/table.rb', line 60

def each_record_batch
  return to_enum(__method__) unless block_given?

  reader = TableBatchReader.new(self)
  while record_batch = reader.read_next
    yield(record_batch)
  end
end

#group(*keys) ⇒ Object

Experimental



253
254
255
# File 'lib/arrow/table.rb', line 253

def group(*keys)
  Group.new(self, keys)
end

#inspectObject



293
294
295
# File 'lib/arrow/table.rb', line 293

def inspect
  "#{super}\n#{to_s}"
end

#inspect_rawObject



292
# File 'lib/arrow/table.rb', line 292

alias_method :inspect_raw, :inspect

#merge(other) ⇒ Arrow::Table

TODO

Returns:



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/arrow/table.rb', line 150

def merge(other)
  added_columns = {}
  removed_columns = {}

  case other
  when Hash
    other.each do |name, value|
      name = name.to_s
      if value
        added_columns[name] = ensure_column(name, value)
      else
        removed_columns[name] = true
      end
    end
  when Table
    added_columns = {}
    other.columns.each do |column|
      added_columns[column.name] = column
    end
  else
    message = "merge target must be Hash or Arrow::Table: " +
      "<#{other.inspect}>: #{inspect}"
    raise ArgumentError, message
  end

  new_columns = []
  columns.each do |column|
    column_name = column.name
    new_column = added_columns.delete(column_name)
    if new_column
      new_columns << new_column
      next
    end
    next if removed_columns.key?(column_name)
    new_columns << column
  end
  added_columns.each do |name, new_column|
    new_columns << new_column
  end
  new_fields = new_columns.collect do |new_column|
    new_column.field
  end
  self.class.new(Schema.new(new_fields), new_columns)
end

#packObject



267
268
269
270
271
272
# File 'lib/arrow/table.rb', line 267

def pack
  packed_columns = columns.collect do |column|
    column.pack
  end
  self.class.new(schema, packed_columns)
end

#remove_column(name_or_index) ⇒ Object



196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/arrow/table.rb', line 196

def remove_column(name_or_index)
  case name_or_index
  when String, Symbol
    name = name_or_index.to_s
    index = columns.index {|column| column.name == name}
    if index.nil?
      message = "unknown column: #{name_or_index.inspect}: #{inspect}"
      raise KeyError.new(message)
    end
  else
    index = name_or_index
    index += n_columns if index < 0
    if index < 0 or index >= n_columns
      message = "out of index (0..#{n_columns - 1}): " +
        "#{name_or_index.inspect}: #{inspect}"
      raise IndexError.new(message)
    end
  end
  remove_column_raw(index)
end

#remove_column_rawObject



195
# File 'lib/arrow/table.rb', line 195

alias_method :remove_column_raw, :remove_column

#respond_to_missing?(name, include_private) ⇒ Boolean

Returns:

  • (Boolean)


297
298
299
300
# File 'lib/arrow/table.rb', line 297

def respond_to_missing?(name, include_private)
  return true if find_column(name)
  super
end

#save(path, options = {}) ⇒ Object



262
263
264
265
# File 'lib/arrow/table.rb', line 262

def save(path, options={})
  saver = TableSaver.new(self, path, options)
  saver.save
end

#select_columns(*selectors, &block) ⇒ Arrow::Table

TODO

Returns:



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/arrow/table.rb', line 220

def select_columns(*selectors, &block)
  if selectors.empty?
    return to_enum(__method__) unless block_given?
    selected_columns = columns.select(&block)
  else
    selected_columns = []
    selectors.each do |selector|
      case selector
      when String, Symbol
        column = find_column(selector)
        if column.nil?
          message = "unknown column: #{selector.inspect}: #{inspect}"
          raise KeyError.new(message)
        end
        selected_columns << column
      when Range
        selected_columns.concat(columns[selector])
      else
        column = columns[selector]
        if column.nil?
          message = "out of index (0..#{n_columns - 1}): " +
          "#{selector.inspect}: #{inspect}"
          raise IndexError.new(message)
        end
        selected_columns << column
      end
    end
    selected_columns = selected_columns.select(&block) if block_given?
  end
  self.class.new(selected_columns)
end

#slice(*args) ⇒ Arrow::Table

TODO

Returns:



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/arrow/table.rb', line 77

def slice(*args)
  slicers = []
  expected_n_args = nil
  case args.size
  when 0
    expected_n_args = "1..2" unless block_given?
  when 1
    slicers << args[0]
  when 2
    from, to = args
    slicers << (from...(from + to))
  else
    if block_given?
      expected_n_args = "0..2"
    else
      expected_n_args = "1..2"
    end
  end
  if expected_n_args
    message = "wrong number of arguments " +
      "(given #{args.size}, expected #{expected_n_args})"
    raise ArgumentError, message
  end

  if block_given?
    block_slicer = yield(Slicer.new(self))
    case block_slicer
    when nil
      # Ignore
    when ::Array
      slicers.concat(block_slicer)
    else
      slicers << block_slicer
    end
  end

  ranges = []
  slicers.each do |slicer|
    slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
    case slicer
    when Integer
      slicer += n_rows if slicer < 0
      ranges << [slicer, slicer]
    when Range
      from = slicer.first
      to = slicer.last
      to -= 1 if slicer.exclude_end?
      from += n_rows if from < 0
      to += n_rows if to < 0
      ranges << [from, to]
    when ::Array
      boolean_array_to_slice_ranges(slicer, 0, ranges)
    when ChunkedArray
      offset = 0
      slicer.each_chunk do |array|
        boolean_array_to_slice_ranges(array, offset, ranges)
        offset += array.length
      end
    when BooleanArray
      boolean_array_to_slice_ranges(slicer, 0, ranges)
    else
      message = "slicer must be Integer, Range, (from, to), " +
        "Arrow::ChunkedArray of Arrow::BooleanArray, " +
        "Arrow::BooleanArray or Arrow::Slicer::Condition: #{slicer.inspect}"
      raise ArgumentError, message
    end
  end
  slice_by_ranges(ranges)
end

#to_s(options = {}) ⇒ Object



275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# File 'lib/arrow/table.rb', line 275

def to_s(options={})
  format = options[:format]
  case format
  when :column
    return to_s_raw
  when :list
    formatter_class = TableListFormatter
  when :table, nil
    formatter_class = TableTableFormatter
  else
    message = ":format must be :column, :list, :table or nil"
    raise ArgumentError, "#{message}: <#{format.inspect}>"
  end
  formatter = formatter_class.new(self, options)
  formatter.format
end

#to_s_rawObject



274
# File 'lib/arrow/table.rb', line 274

alias_method :to_s_raw, :to_s

#window(size: nil) ⇒ Object

Experimental



258
259
260
# File 'lib/arrow/table.rb', line 258

def window(size: nil)
  RollingWindow.new(self, size)
end