Class: XMLRowFinder

Inherits:
Object
  • Object
show all
Defined in:
lib/xml_row_finder.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(raws, debug: false) ⇒ XMLRowFinder

Returns a new instance of XMLRowFinder.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/xml_row_finder.rb', line 12

def initialize(raws, debug: false)

  @debug = debug

  @doc = if raws =~ /^http/ then

    nki = Nokorexi.new(url=raws) do |doc1|
      doc1.xpath('//*[@onclick]').each do |e|
        e.attributes['onclick'].value = ''
      end

      doc1.xpath('//*[@onmousedown]').each do |e|
        e.attributes['onmousedown'].value = ''
      end

    end

    nki.to_doc

  else
    Rexle.new(raws)
  end

  a = []

  @doc.root.each_recursive do |e|
    e.attributes.delete
    a << e.backtrack.to_xpath
  end

  @to_a = a2 = a.map {|e| [a.count(e), e] }.uniq
  xpath = a2.max_by(&:first).last

  a3 = xpath.split('/')
  a4 = [xpath]
  p1 = []

  until (a3.length < 1) do
    p1 << a3.pop; a4 << a3.join('/') + "[%s]" % p1.reverse.join('/')
  end

  # using Nokogiri since Rexle has a bug with xpath predicates
  #
  @doc2 = Nokogiri::XML(@doc.root.xml)

  a5 = a4[0..-2].map do |xpath2|
    [@doc2.xpath(xpath2).length, xpath2]
  end

  @xpath = a5.reverse.detect {|num, xpath2| num > 1}.last

  last_row = @doc2.xpath(@xpath).last

  # find the container element
  xpath = @xpath[/^[^\[]+/]
  axpath = xpath.split('/')
  e = @doc.element xpath

  until (e.xml.include? last_row) do
    axpath.pop
    e = @doc.element axpath.join('/')
  end

  @cont_xpath = axpath.join('/')

end

Instance Attribute Details

#to_aObject (readonly)

Returns the value of attribute to_a.



10
11
12
# File 'lib/xml_row_finder.rb', line 10

def to_a
  @to_a
end

Instance Method Details

#bodyObject

returns the container element for all rows object returned: Rexle::Element



82
83
84
# File 'lib/xml_row_finder.rb', line 82

def body()
  @doc.element @cont_xpath
end

#body_xpathObject

returns the xpath pointing to the container element for all rows



88
89
90
# File 'lib/xml_row_finder.rb', line 88

def body_xpath()
  @cont_xpath
end

#rowsObject

returns rows object returned: An array of Nokogiri XML Element object



96
97
98
# File 'lib/xml_row_finder.rb', line 96

def rows()
  @doc2.xpath @xpath
end

#to_xpathObject Also known as: rows_xpath

returns the xpath pointing to the rows



102
103
104
# File 'lib/xml_row_finder.rb', line 102

def to_xpath()
  @xpath
end