Class: Slasher::DOM

Inherits:
Object
  • Object
show all
Defined in:
lib/slasher/dom.rb

Constant Summary collapse

REMOVED_ELEMENTS =
['iframe', 'script', 'style', 'noscript', 'header', 'footer', 'br', 'img']
STRIPPED_ELEMENTS =
['blockquote', 'strong', 'a', 'em', 'b']

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(document) ⇒ DOM

Returns a new instance of DOM.



8
9
10
# File 'lib/slasher/dom.rb', line 8

def initialize(document)
  @document = Nokogiri::HTML(document)
end

Instance Attribute Details

#documentObject

Returns the value of attribute document.



6
7
8
# File 'lib/slasher/dom.rb', line 6

def document
  @document
end

Instance Method Details

#get_paragraphs_content(node) ⇒ Object



26
27
28
29
30
31
32
33
# File 'lib/slasher/dom.rb', line 26

def get_paragraphs_content(node)
  content = ""
  node.send(:>, "p").each do |p|
    content += p.text
    p.remove
  end
  content
end

#get_texts(node) ⇒ Object



35
36
37
38
39
40
41
# File 'lib/slasher/dom.rb', line 35

def get_texts(node)
  content = ""
  node.children.each do |child|
    content += child.text.delete("\n").strip if child.text?
  end
  content
end

#remove_elementsObject



12
13
14
15
16
# File 'lib/slasher/dom.rb', line 12

def remove_elements
  REMOVED_ELEMENTS.each do |element|
    @document.xpath("//#{element}").remove
  end
end

#strip_elementsObject



18
19
20
21
22
23
24
# File 'lib/slasher/dom.rb', line 18

def strip_elements
  STRIPPED_ELEMENTS.each do |element|
    @document.search("//#{element}").each do |node|
      node.replace(Nokogiri::XML::Text.new(node.text, node.document))
    end
  end
end