Class: Slasher::DOM
- Inherits:
-
Object
- Object
- Slasher::DOM
- Defined in:
- lib/slasher/dom.rb
Constant Summary collapse
- REMOVED_ELEMENTS =
['iframe', 'script', 'style', 'noscript', 'header', 'footer', 'br', 'img']
- STRIPPED_ELEMENTS =
['blockquote', 'strong', 'a', 'em', 'b']
Instance Attribute Summary collapse
-
#document ⇒ Object
Returns the value of attribute document.
Instance Method Summary collapse
- #get_paragraphs_content(node) ⇒ Object
- #get_texts(node) ⇒ Object
-
#initialize(document) ⇒ DOM
constructor
A new instance of DOM.
- #remove_elements ⇒ Object
- #strip_elements ⇒ Object
Constructor Details
#initialize(document) ⇒ DOM
Returns a new instance of DOM.
8 9 10 |
# File 'lib/slasher/dom.rb', line 8 def initialize(document) @document = Nokogiri::HTML(document) end |
Instance Attribute Details
#document ⇒ Object
Returns the value of attribute document.
6 7 8 |
# File 'lib/slasher/dom.rb', line 6 def document @document end |
Instance Method Details
#get_paragraphs_content(node) ⇒ Object
26 27 28 29 30 31 32 33 |
# File 'lib/slasher/dom.rb', line 26 def get_paragraphs_content(node) content = "" node.send(:>, "p").each do |p| content += p.text p.remove end content end |
#get_texts(node) ⇒ Object
35 36 37 38 39 40 41 |
# File 'lib/slasher/dom.rb', line 35 def get_texts(node) content = "" node.children.each do |child| content += child.text.delete("\n").strip if child.text? end content end |
#remove_elements ⇒ Object
12 13 14 15 16 |
# File 'lib/slasher/dom.rb', line 12 def remove_elements REMOVED_ELEMENTS.each do |element| @document.xpath("//#{element}").remove end end |
#strip_elements ⇒ Object
18 19 20 21 22 23 24 |
# File 'lib/slasher/dom.rb', line 18 def strip_elements STRIPPED_ELEMENTS.each do |element| @document.search("//#{element}").each do |node| node.replace(Nokogiri::XML::Text.new(node.text, node.document)) end end end |