Class: Arrow::Table

Inherits:
Object
  • Object
show all
Includes:
RecordContainable
Defined in:
lib/arrow/table.rb

Class Method Summary collapse

Instance Method Summary collapse

Methods included from RecordContainable

#each_column, #each_record, #find_column

Constructor Details

#initialize(schema_or_raw_table_or_columns, columns = nil) ⇒ Table

Returns a new instance of Table.



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/arrow/table.rb', line 32

def initialize(schema_or_raw_table_or_columns, columns=nil)
  if columns.nil?
    if schema_or_raw_table_or_columns[0].is_a?(Column)
      columns = schema_or_raw_table_or_columns
      fields = columns.collect(&:field)
      schema = Schema.new(fields)
    else
      raw_table = schema_or_raw_table_or_columns
      fields = []
      columns = []
      raw_table.each do |name, array|
        field = Field.new(name.to_s, array.value_data_type)
        fields << field
        columns << Column.new(field, array)
      end
      schema = Schema.new(fields)
    end
  else
    schema = schema_or_raw_table_or_columns
  end
  initialize_raw(schema, columns)
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(name, *args, &block) ⇒ Object



293
294
295
296
297
298
299
# File 'lib/arrow/table.rb', line 293

def method_missing(name, *args, &block)
  if args.empty?
    column = find_column(name)
    return column if column
  end
  super
end

Class Method Details

.load(path, options = {}) ⇒ Object



26
27
28
# File 'lib/arrow/table.rb', line 26

def load(path, options={})
  TableLoader.load(path, options)
end

Instance Method Details

#columnsObject



55
56
57
# File 'lib/arrow/table.rb', line 55

def columns
  @columns ||= n_columns.times.collect {|i| get_column(i)}
end

#each_record_batchObject



59
60
61
62
63
64
65
66
# File 'lib/arrow/table.rb', line 59

def each_record_batch
  return to_enum(__method__) unless block_given?

  reader = TableBatchReader.new(self)
  while record_batch = reader.read_next
    yield(record_batch)
  end
end

#group(*keys) ⇒ Object

Experimental



252
253
254
# File 'lib/arrow/table.rb', line 252

def group(*keys)
  Group.new(self, keys)
end

#initialize_rawObject



31
# File 'lib/arrow/table.rb', line 31

alias_method :initialize_raw, :initialize

#inspectObject



284
285
286
# File 'lib/arrow/table.rb', line 284

def inspect
  "#{super}\n#{to_s}"
end

#merge(other) ⇒ Arrow::Table

TODO

Returns:



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/arrow/table.rb', line 149

def merge(other)
  added_columns = {}
  removed_columns = {}

  case other
  when Hash
    other.each do |name, value|
      name = name.to_s
      if value
        added_columns[name] = ensure_column(name, value)
      else
        removed_columns[name] = true
      end
    end
  when Table
    added_columns = {}
    other.columns.each do |column|
      added_columns[column.name] = column
    end
  else
    message = "merge target must be Hash or Arrow::Table: " +
      "<#{other.inspect}>: #{inspect}"
    raise ArgumentError, message
  end

  new_columns = []
  columns.each do |column|
    column_name = column.name
    new_column = added_columns.delete(column_name)
    if new_column
      new_columns << new_column
      next
    end
    next if removed_columns.key?(column_name)
    new_columns << column
  end
  added_columns.each do |name, new_column|
    new_columns << new_column
  end
  new_fields = new_columns.collect do |new_column|
    new_column.field
  end
  self.class.new(Schema.new(new_fields), new_columns)
end

#packObject



266
267
268
269
270
271
# File 'lib/arrow/table.rb', line 266

def pack
  packed_columns = columns.collect do |column|
    column.pack
  end
  self.class.new(schema, packed_columns)
end

#remove_column(name_or_index) ⇒ Object



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/arrow/table.rb', line 195

def remove_column(name_or_index)
  case name_or_index
  when String, Symbol
    name = name_or_index.to_s
    index = columns.index {|column| column.name == name}
    if index.nil?
      message = "unknown column: #{name_or_index.inspect}: #{inspect}"
      raise KeyError.new(message)
    end
  else
    index = name_or_index
    index += n_columns if index < 0
    if index < 0 or index >= n_columns
      message = "out of index (0..#{n_columns - 1}): " +
        "#{name_or_index.inspect}: #{inspect}"
      raise IndexError.new(message)
    end
  end
  remove_column_raw(index)
end

#remove_column_rawObject



194
# File 'lib/arrow/table.rb', line 194

alias_method :remove_column_raw, :remove_column

#respond_to_missing?(name, include_private) ⇒ Boolean

Returns:

  • (Boolean)


288
289
290
291
# File 'lib/arrow/table.rb', line 288

def respond_to_missing?(name, include_private)
  return true if find_column(name)
  super
end

#save(path, options = {}) ⇒ Object



261
262
263
264
# File 'lib/arrow/table.rb', line 261

def save(path, options={})
  saver = TableSaver.new(self, path, options)
  saver.save
end

#select_columns(*selectors, &block) ⇒ Arrow::Table

TODO

Returns:



219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# File 'lib/arrow/table.rb', line 219

def select_columns(*selectors, &block)
  if selectors.empty?
    return to_enum(__method__) unless block_given?
    selected_columns = columns.select(&block)
  else
    selected_columns = []
    selectors.each do |selector|
      case selector
      when String, Symbol
        column = find_column(selector)
        if column.nil?
          message = "unknown column: #{selector.inspect}: #{inspect}"
          raise KeyError.new(message)
        end
        selected_columns << column
      when Range
        selected_columns.concat(columns[selector])
      else
        column = columns[selector]
        if column.nil?
          message = "out of index (0..#{n_columns - 1}): " +
          "#{selector.inspect}: #{inspect}"
          raise IndexError.new(message)
        end
        selected_columns << column
      end
    end
    selected_columns = selected_columns.select(&block) if block_given?
  end
  self.class.new(selected_columns)
end

#slice(*args) ⇒ Arrow::Table

TODO

Returns:



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/arrow/table.rb', line 76

def slice(*args)
  slicers = []
  expected_n_args = nil
  case args.size
  when 0
    expected_n_args = "1..2" unless block_given?
  when 1
    slicers << args[0]
  when 2
    from, to = args
    slicers << (from...(from + to))
  else
    if block_given?
      expected_n_args = "0..2"
    else
      expected_n_args = "1..2"
    end
  end
  if expected_n_args
    message = "wrong number of arguments " +
      "(given #{args.size}, expected #{expected_n_args})"
    raise ArgumentError, message
  end

  if block_given?
    block_slicer = yield(Slicer.new(self))
    case block_slicer
    when nil
      # Ignore
    when ::Array
      slicers.concat(block_slicer)
    else
      slicers << block_slicer
    end
  end

  ranges = []
  slicers.each do |slicer|
    slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
    case slicer
    when Integer
      slicer += n_rows if slicer < 0
      ranges << [slicer, slicer]
    when Range
      from = slicer.first
      to = slicer.last
      to -= 1 if slicer.exclude_end?
      from += n_rows if from < 0
      to += n_rows if to < 0
      ranges << [from, to]
    when ::Array
      boolean_array_to_slice_ranges(slicer, 0, ranges)
    when ChunkedArray
      offset = 0
      slicer.each_chunk do |array|
        boolean_array_to_slice_ranges(array, offset, ranges)
        offset += array.length
      end
    when BooleanArray
      boolean_array_to_slice_ranges(slicer, 0, ranges)
    else
      message = "slicer must be Integer, Range, (from, to), " +
        "Arrow::ChunkedArray of Arrow::BooleanArray, " +
        "Arrow::BooleanArray or Arrow::Slicer::Condition: #{slicer.inspect}"
      raise ArgumentError, message
    end
  end
  slice_by_ranges(ranges)
end

#to_s(options = {}) ⇒ Object



273
274
275
276
277
278
279
280
281
282
# File 'lib/arrow/table.rb', line 273

def to_s(options={})
  case options[:format]
  when :list
    formatter_class = TableListFormatter
  else
    formatter_class = TableTableFormatter
  end
  formatter = formatter_class.new(self, options)
  formatter.format
end

#window(size: nil) ⇒ Object

Experimental



257
258
259
# File 'lib/arrow/table.rb', line 257

def window(size: nil)
  RollingWindow.new(self, size)
end