Top Level Namespace
Instance Method Summary collapse
-
#html_to_text(node) ⇒ Object
require ‘open-uri’.
Instance Method Details
#html_to_text(node) ⇒ Object
require ‘open-uri’
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/html_text_gem.rb', line 6 def html_to_text(node) blocks = %w[div] # put newlines after separator = { "br"=>"\n", "br"=>"\n#{'-' * 70}\n" } # content separators dup = node.dup # remove whitespaces dup.xpath('.//text()').each{ |t| t.content=t.text.gsub(/>\s+</, " ")} # extract urls element = dup.at_xpath('//a[text()]') element["href"] # swap out the separator dup.css(separator.keys.join(',')).each{ |n| n.replace( separator[n.name] ) } # add newlines after each block level element dup.css(blocks.join(',')).each{ |n| n.after("\n\n") } # return modified text content return dup.text end |