Method: WWMD::Page#html2text
- Defined in:
-
lib/wwmd/page/html2text_hpricot.rb,
lib/wwmd/page/html2text_nokogiri.rb
def lookup_named_char(s)
c = Hpricot::NamedCharacters[s[1...-1]]
c.chr if c
end
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/wwmd/page/html2text_hpricot.rb', line 58 def html2text doc = self.scrape.hdoc text = node_to_text(doc) # text.gsub!(NamedCharRegex){|s| "#{lookup_named_char(s)}"} # clean up white space text.gsub!("\r"," ") text.squeeze!(" ") text.strip! ret = '' text.split(/\n/).each do |l| l.strip! next if l == '' next if l =~ /^\?+$/ ret += "#{l}\n" end return ret end |