Class: Slasher::DOM

Inherits:
Object
  • Object
show all
Defined in:
lib/slasher/dom.rb

Constant Summary collapse

REMOVED_ELEMENTS =
['iframe', 'script', 'style', 'noscript', 'header', 'footer', 'br', 'img']
STRIPPED_ELEMENTS =
['blockquote', 'strong', 'a', 'em', 'b']

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(document) ⇒ DOM



10
11
12
# File 'lib/slasher/dom.rb', line 10

def initialize(document)
  @document = Nokogiri::HTML(document)
end

Instance Attribute Details

#documentObject

Returns the value of attribute document.



8
9
10
# File 'lib/slasher/dom.rb', line 8

def document
  @document
end

Instance Method Details

#get_paragraphs_content(node) ⇒ Object



28
29
30
31
32
33
# File 'lib/slasher/dom.rb', line 28

def get_paragraphs_content(node)
  node.send(:>, "p").map do |p|
    p.text
    p.remove
  end.join(" ")
end

#get_texts(node) ⇒ Object



35
36
37
38
39
# File 'lib/slasher/dom.rb', line 35

def get_texts(node)
  node.children.map do |child|
    child.text.delete("\n").strip if child.text?
  end.join
end

#remove_elementsObject



14
15
16
17
18
# File 'lib/slasher/dom.rb', line 14

def remove_elements
  REMOVED_ELEMENTS.each do |element|
    @document.xpath("//#{element}").remove
  end
end

#strip_elementsObject



20
21
22
23
24
25
26
# File 'lib/slasher/dom.rb', line 20

def strip_elements
  STRIPPED_ELEMENTS.each do |element|
    @document.search("//#{element}").each do |node|
      node.replace(Nokogiri::XML::Text.new(node.text, node.document))
    end
  end
end