Class: DeltaLake::TableOptimizer

Inherits:
Object
  • Object
show all
Defined in:
lib/deltalake/table_optimizer.rb

Instance Method Summary collapse

Constructor Details

#initialize(table) ⇒ TableOptimizer

Returns a new instance of TableOptimizer.



3
4
5
# File 'lib/deltalake/table_optimizer.rb', line 3

def initialize(table)
  @table = table
end

Instance Method Details

#compact(target_size: nil, max_concurrent_tasks: nil, min_commit_interval: nil) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/deltalake/table_optimizer.rb', line 7

def compact(
  target_size: nil,
  max_concurrent_tasks: nil,
  min_commit_interval: nil
)
  metrics =
    @table._table.compact_optimize(
      target_size,
      max_concurrent_tasks,
      min_commit_interval
    )
  @table.update_incremental
  result = JSON.parse(metrics)
  ["filesAdded", "filesRemoved"].each do |key|
    result[key] = JSON.parse(result[key]) if result[key].is_a?(String)
  end
  # TODO return underscore symbols like delete
  result
end

#z_order(columns, target_size: nil, max_concurrent_tasks: nil, max_spill_size: 20 * 1024 * 1024 * 1024, min_commit_interval: nil) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/deltalake/table_optimizer.rb', line 27

def z_order(
  columns,
  target_size: nil,
  max_concurrent_tasks:  nil,
  max_spill_size: 20 * 1024 * 1024 * 1024,
  min_commit_interval: nil
)
  metrics =
    @table._table.z_order_optimize(
      Array(columns),
      target_size,
      max_concurrent_tasks,
      max_spill_size,
      min_commit_interval
    )
  @table.update_incremental
  result = JSON.parse(metrics)
  ["filesAdded", "filesRemoved"].each do |key|
    result[key] = JSON.parse(result[key]) if result[key].is_a?(String)
  end
  # TODO return underscore symbols like delete
  result
end