Class: Saper::Items::HTML

Inherits:
Saper::Item show all
Defined in:
lib/saper/items/html.rb

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Saper::Item

#==, [], exists?, inherited, subclasses, try, type, #type

Constructor Details

#initialize(noko) ⇒ HTML

Returns a new instance of HTML.



28
29
30
31
32
33
# File 'lib/saper/items/html.rb', line 28

def initialize(noko)
  @noko = noko
  # Force UTF-8 encoding
  # https://github.com/sparklemotion/nokogiri/issues/117
  @noko.document.encoding = 'UTF-8'
end

Class Method Details

.new(item) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/saper/items/html.rb', line 5

def self.new(item)
  super case item
  when Nokogiri::XML::Element
    item
  when Nokogiri::HTML
    item
  when Text
    parse(item.to_s)
  when String
    parse(item)
  else
    raise(InvalidItem, item)
  end
end

.parse(string, uri = nil, charset = nil) ⇒ Object



20
21
22
23
24
25
26
# File 'lib/saper/items/html.rb', line 20

def self.parse(string, uri = nil, charset = nil)
  begin
    Nokogiri::HTML.parse(string, uri, charset)
  rescue
    raise(InvalidItem, string)
  end
end

Instance Method Details

#[](name) ⇒ Object



39
40
41
# File 'lib/saper/items/html.rb', line 39

def [](name)
  @noko[name]
end

#find(xpath) ⇒ Object



43
44
45
# File 'lib/saper/items/html.rb', line 43

def find(xpath)
  find_all(xpath).first
end

#find_all(xpath) ⇒ Object



47
48
49
# File 'lib/saper/items/html.rb', line 47

def find_all(xpath)
  @noko.search(xpath).map { |element| HTML.new(element) }
end

#inner_htmlObject



63
64
65
# File 'lib/saper/items/html.rb', line 63

def inner_html
  @noko.inner_html
end

#inner_textObject



67
68
69
# File 'lib/saper/items/html.rb', line 67

def inner_text
  @noko.inner_text
end

#nameObject



35
36
37
# File 'lib/saper/items/html.rb', line 35

def name
  @noko.name
end

#remove(tag) ⇒ Object



59
60
61
# File 'lib/saper/items/html.rb', line 59

def remove(tag)
  remove_children_preserving_content(tag)
end

#remove_children_preserving_content(xpath) ⇒ Object



55
56
57
# File 'lib/saper/items/html.rb', line 55

def remove_children_preserving_content(xpath)
  @noko.search(xpath).each { |item| item.replace(item.children) }; self
end

#remove_children_with_content(xpath) ⇒ Object



51
52
53
# File 'lib/saper/items/html.rb', line 51

def remove_children_with_content(xpath)
  @noko.search(xpath).each { |item| item.remove }; self
end

#to_markdownObject



71
72
73
# File 'lib/saper/items/html.rb', line 71

def to_markdown
  Markdown.new self
end

#to_nativeObject



75
76
77
# File 'lib/saper/items/html.rb', line 75

def to_native
  inner_html
end

#to_sObject



79
80
81
# File 'lib/saper/items/html.rb', line 79

def to_s
  inner_html
end