Class: Scrubyt::SharedUtils

Inherits:
Object
  • Object
show all
Defined in:
lib/scrubyt/utils/shared_utils.rb

Overview

Utilities shared between the other utility classes (XPathUtils, SimpleExampleLookup,...)

Constant Summary collapse

ENTITIES =

Entities to replace - need to make this more complete, or install htmlentities or similar package

{
'quot'      => '"',
'apos'      => "'",
'amp'       => '&',
'lt'        => '<',
'gt'        => '>',
'nbsp'      => ' '}

Class Method Summary collapse

Class Method Details

.get_backtraceObject



48
49
50
51
52
53
54
55
56
# File 'lib/scrubyt/utils/shared_utils.rb', line 48

def self.get_backtrace
  begin
    raise
  rescue Exception => ex
    backtrace = ex.backtrace
  end
  backtrace.slice!(0)
  backtrace
end

.prepare_text_for_comparison(text) ⇒ Object



15
16
17
18
19
# File 'lib/scrubyt/utils/shared_utils.rb', line 15

def self.prepare_text_for_comparison(text)
  unescape_entities text
  text.strip!
  text
end

.traverse_for_match(node, regexp) ⇒ Object

Entry point for finding the elements specified by examples



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/scrubyt/utils/shared_utils.rb', line 28

def self.traverse_for_match(node, regexp)
  results = []
  traverse_for_match_inner = lambda { |node, regexp|
    ft = prepare_text_for_comparison(node.inner_html.gsub(/<.*?>/, ''))
    if ft =~ regexp
      node.instance_eval do
        @match_data = $~
        def match_data
          @match_data
        end
      end
      results << node
      results.delete node.parent if node.is_a? Hpricot::Elem
    end
    node.children.each { |child| traverse_for_match_inner.call(child, regexp) if (child.is_a? Hpricot::Elem) }
  }
  traverse_for_match_inner.call(node,regexp)
  results
end

.unescape_entities(text) ⇒ Object

Unescape the entities in the HTML!



22
23
24
25
# File 'lib/scrubyt/utils/shared_utils.rb', line 22

def self.unescape_entities(text)
  ENTITIES.each {|e,s| text.gsub!(/\&#{e};/) {"#{s}"} }
  text
end