Class: Tabula::Table

Inherits:
Object
  • Object
show all
Includes:
Tabular
Defined in:
lib/tabula/entities/table.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Tabular

#page, #to_a

Methods included from AbstractInterface

included

Constructor Details

#initialize(line_count, separators) ⇒ Table

Returns a new instance of Table.



6
7
8
9
10
# File 'lib/tabula/entities/table.rb', line 6

def initialize(line_count, separators)
  @separators = separators
  @lines = (0...line_count).inject([]) { |m| m << Line.new }
  @extraction_method = "original"
end

Instance Attribute Details

#extraction_methodObject (readonly)

Returns the value of attribute extraction_method.



4
5
6
# File 'lib/tabula/entities/table.rb', line 4

def extraction_method
  @extraction_method
end

Class Method Details

.new_from_array(array_of_rows) ⇒ Object

create a new Table object from an array of arrays, representing a list of rows in a spreadsheet probably only used for testing



45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/tabula/entities/table.rb', line 45

def self.new_from_array(array_of_rows)
  t = Table.new(array_of_rows.size, [])
  @extraction_method = "testing"
  tlines = []
  array_of_rows.each_with_index do |row, index|
    l = Line.new
    l.text_elements = row.each_with_index.map{|cell, inner_index| TextElement.new(index, inner_index, 1, 1, nil, nil, cell, nil)}
    tlines << l
  end
  t.instance_variable_set(:@lines, tlines)
  t.send(:rpad!)
  t
end

Instance Method Details

#==(other) ⇒ Object

used for testing, ignores separator locations (they’ll sometimes be nil/empty)



61
62
63
64
65
66
67
68
69
# File 'lib/tabula/entities/table.rb', line 61

def ==(other)
  self.instance_variable_set(:@lines, self.lstrip_lines)
  other.instance_variable_set(:@lines, other.lstrip_lines)
  self.instance_variable_set(:@lines, self.lines.rpad(Line.new, other.lines.size))
  other.instance_variable_set(:@lines, other.lines.rpad(Line.new, self.lines.size))

  self.rows.zip(other.rows).all? { |my, yours| my == yours }

end

#add_text_element(text_element, i, j) ⇒ Object



12
13
14
15
16
17
18
19
20
21
# File 'lib/tabula/entities/table.rb', line 12

def add_text_element(text_element, i, j)
  if @lines.size <= i
    @lines[i] = Line.new
  end
  if @lines[i].text_elements[j]
    @lines[i].text_elements[j].merge!(text_element)
  else
    @lines[i].text_elements[j] = text_element
  end
end

#colsObject



24
25
26
# File 'lib/tabula/entities/table.rb', line 24

def cols
  rows.transpose
end

#rowsObject

TODO: this is awful, refactor



29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/tabula/entities/table.rb', line 29

def rows
  rpad!
  lstrip_lines!
  li = lines.map do |l|
    l.text_elements.map! do |te|
      te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
    end
  end.select do
    |l| !l.all? { |te| te.text.empty? }
  end.sort_by do |l|
    l.map { |te| te.top || 0 }.max
  end
end

#to_csvObject



80
81
82
83
84
85
# File 'lib/tabula/entities/table.rb', line 80

def to_csv
  out = StringIO.new
  out.set_encoding("utf-8")
  Tabula::Writers.CSV(rows, out)
  out.string
end

#to_json(*a) ⇒ Object



71
72
73
74
75
76
77
78
# File 'lib/tabula/entities/table.rb', line 71

def to_json(*a)
  {
    'json_class'   => self.class.name,
    'extraction_method' => @extraction_method,
    'vertical_separators' => @separators,
    'data' => rows,
  }.to_json(*a)
end

#to_tsvObject



87
88
89
90
91
92
# File 'lib/tabula/entities/table.rb', line 87

def to_tsv
  out = StringIO.new
  out.set_encoding("utf-8")      
  Tabula::Writers.TSV(rows, out)
  out.string
end