Module: Docp::TableRemoveMethods

Included in:
Table, Table
Defined in:
lib/docp/table_remove_methods.rb

Instance Method Summary collapse

Instance Method Details

#colspan_join(parse_doc) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/docp/table_remove_methods.rb', line 13

def colspan_join parse_doc
  parse_doc.search('tr').each_with_index {|tr, y|
    next_tr = tr.next_element
    tr.elements.each_with_index do|td, x|
      next if td[:colspan].nil? || next_tr.nil?
      col_depth = td[:colspan].to_i - 1
      col_depth.downto(0).map do|xx|
        next if next_tr.elements[xx].nil?
        td.next = next_tr.elements[xx].clone.tap {|e| 
          e.content = td.text + " " + next_tr.elements[xx].text
        }
        next_tr.elements[xx]
      end.compact.map(&:remove)
      td.remove
    end
    
#         tr.elements.each do|ch|
#           ch.attributes.each do|k, v|
#             ch.delete(k) if k=="colspan"
#           end
#         end
  }
end

#doc_remove_attributes(remove_doc) ⇒ Object



3
4
5
6
7
8
9
10
11
# File 'lib/docp/table_remove_methods.rb', line 3

def doc_remove_attributes(remove_doc)
  spam = "//*[contains(@style,'display:none')]"
  remove_doc.search(spam).remove
  remove_doc.search('tr', 'th', 'td').each do|row|
    row.attributes.each do|k, v|
      row.delete(k)
    end
  end
end

#rowspan_flatten(parse_doc) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/docp/table_remove_methods.rb', line 66

def rowspan_flatten parse_doc
  parse_doc.search('tr').each_with_index {|tr, y|
    row_depth = tr.elements.map {|td| td[:rowspan].to_i - 1 if td[:rowspan]}.compact.sort[-1]
    next if row_depth.nil?
    row_depth.times do
      if tr.next_element
        tr.add_child tr.next_element.elements
        tr.next_element.remove
      end
    end
  }
end

#rowspan_join(parse_doc) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/docp/table_remove_methods.rb', line 37

def rowspan_join parse_doc
  parse_doc.search('tr').each_with_index {|tr, y|
    row_depth = 0
    no_rowspans = []
    tr.elements.each do|td|
      if td[:rowspan]
        row_depth = td[:rowspan].to_i-1
      else
        no_rowspans << td
      end
    end

    if row_depth > 0 
      row_depth.times do
        if tr.next_element
          tr.next_element.elements.each_with_index do|td, i|
            if no_rowspans[i]
              no_rowspans[i].content = "#{no_rowspans[i].text} #{td.text}"
            else
              tr.add_child td
            end
          end
          tr.next_element.remove
        end
      end
    end
  }
end