Class: Saper::Items::HTML

Inherits:
Saper::Item show all
Defined in:
lib/saper/items/html.rb

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Saper::Item

[], exists?, inherited, #serialize, subclasses, try, #type, type

Constructor Details

#initialize(noko) ⇒ HTML


42
43
44
45
46
47
# File 'lib/saper/items/html.rb', line 42

def initialize(noko)
  @noko = noko
  # Force UTF-8 encoding
  # https://github.com/sparklemotion/nokogiri/issues/117
  @noko.document.encoding = 'UTF-8'
end

Class Method Details

.new(item) ⇒ Object


6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/saper/items/html.rb', line 6

def self.new(item)
  super case item
  when Nokogiri::XML::Element
    item
  when Nokogiri::XML::Document
    item
  when Nokogiri::HTML
    item
  when Document
    parse(item.body, item.uri, item.charset)
  when Text
    parse(item.to_s)
  when String
    parse(item)
  else
    raise(Errors::InvalidItem, item)
  end
end

.parse(string, uri = nil, charset = nil) ⇒ Object


26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/saper/items/html.rb', line 26

def self.parse(string, uri = nil, charset = nil)
  if string.empty?
    raise(Errors::InvalidItem, string)
  end
  begin
    # Nokogiri adds an additional body tag for strings that don't
    # have one. Should probably use Nokogiri::HTML.fragment in that
    # case, but it breaks the search function:
    # https://github.com/sparklemotion/nokogiri/issues/572
    Nokogiri::HTML.parse(string, uri, charset)
  rescue
    raise(Errors::InvalidItem, string)
  end
end

Instance Method Details

#==(other) ⇒ Object


55
56
57
# File 'lib/saper/items/html.rb', line 55

def ==(other)
  to_s == other.to_s
end

#[](name) ⇒ Object


65
66
67
# File 'lib/saper/items/html.rb', line 65

def [](name)
  @noko.has_attribute?(name) ? Text.new(@noko[name]) : nil
end

#dupObject


50
51
52
# File 'lib/saper/items/html.rb', line 50

def dup
  HTML.new(@noko.dup)
end

#find(xpath) ⇒ Object


70
71
72
# File 'lib/saper/items/html.rb', line 70

def find(xpath)
  find_all(xpath).first || Nothing.new
end

#find_all(xpath) ⇒ Object


75
76
77
# File 'lib/saper/items/html.rb', line 75

def find_all(xpath)
  @noko.search(xpath).map { |element| HTML.new(element) } || Nothing.new
end

#inner_htmlObject


100
101
102
# File 'lib/saper/items/html.rb', line 100

def inner_html
  Text.new @noko.inner_html.gsub("\n","")
end

#inner_textObject


105
106
107
# File 'lib/saper/items/html.rb', line 105

def inner_text
  Text.new @noko.inner_text
end

#nameObject


60
61
62
# File 'lib/saper/items/html.rb', line 60

def name
  @noko.name
end

#remove_children_preserving_content(xpath) ⇒ Object


95
96
97
# File 'lib/saper/items/html.rb', line 95

def remove_children_preserving_content(xpath)
  dup.remove_children_preserving_content!(xpath)
end

#remove_children_preserving_content!(xpath) ⇒ Object


90
91
92
# File 'lib/saper/items/html.rb', line 90

def remove_children_preserving_content!(xpath)
  @noko.search(xpath).each { |item| item.replace(item.children) }; self
end

#remove_children_with_content(xpath) ⇒ Object


85
86
87
# File 'lib/saper/items/html.rb', line 85

def remove_children_with_content(xpath)
  dup.remove_children_with_content!(xpath)
end

#remove_children_with_content!(xpath) ⇒ Object


80
81
82
# File 'lib/saper/items/html.rb', line 80

def remove_children_with_content!(xpath)
  @noko.search(xpath).each { |item| item.remove }; self
end

#to_markdownObject


110
111
112
# File 'lib/saper/items/html.rb', line 110

def to_markdown
  Markdown.new self
end

#to_nativeObject


115
116
117
# File 'lib/saper/items/html.rb', line 115

def to_native
  inner_html.to_native
end

#to_sObject


120
121
122
# File 'lib/saper/items/html.rb', line 120

def to_s
  @noko.is_a?(Nokogiri::HTML::Document) ? to_native : @noko.to_s.strip
end