Class: Slasher::DOM
- Inherits:
-
Object
- Object
- Slasher::DOM
- Defined in:
- lib/slasher/dom.rb
Constant Summary collapse
- REMOVED_ELEMENTS =
['iframe', 'script', 'style', 'noscript', 'header', 'footer', 'br', 'img']
- STRIPPED_ELEMENTS =
['blockquote', 'strong', 'a', 'em', 'b']
Instance Attribute Summary collapse
-
#document ⇒ Object
Returns the value of attribute document.
Instance Method Summary collapse
- #get_paragraphs_content(node) ⇒ Object
- #get_texts(node) ⇒ Object
-
#initialize(document) ⇒ DOM
constructor
A new instance of DOM.
- #remove_elements ⇒ Object
- #strip_elements ⇒ Object
Constructor Details
#initialize(document) ⇒ DOM
10 11 12 |
# File 'lib/slasher/dom.rb', line 10 def initialize(document) @document = Nokogiri::HTML(document) end |
Instance Attribute Details
#document ⇒ Object
Returns the value of attribute document.
8 9 10 |
# File 'lib/slasher/dom.rb', line 8 def document @document end |
Instance Method Details
#get_paragraphs_content(node) ⇒ Object
28 29 30 31 32 33 |
# File 'lib/slasher/dom.rb', line 28 def get_paragraphs_content(node) node.send(:>, "p").map do |p| p.text p.remove end.join(" ") end |
#get_texts(node) ⇒ Object
35 36 37 38 39 |
# File 'lib/slasher/dom.rb', line 35 def get_texts(node) node.children.map do |child| child.text.delete("\n").strip if child.text? end.join end |
#remove_elements ⇒ Object
14 15 16 17 18 |
# File 'lib/slasher/dom.rb', line 14 def remove_elements REMOVED_ELEMENTS.each do |element| @document.xpath("//#{element}").remove end end |
#strip_elements ⇒ Object
20 21 22 23 24 25 26 |
# File 'lib/slasher/dom.rb', line 20 def strip_elements STRIPPED_ELEMENTS.each do |element| @document.search("//#{element}").each do |node| node.replace(Nokogiri::XML::Text.new(node.text, node.document)) end end end |