Module: HtmlToPlainText
- Included in:
- Premailer
- Defined in:
- lib/html_to_plain_text.rb
Overview
Support functions for Premailer
Instance Method Summary collapse
-
#convert_to_text(html, line_length, from_charset = 'UTF-8', shorten = false) ⇒ Object
Returns the text in UTF-8 format with all HTML tags removed.
Instance Method Details
#convert_to_text(html, line_length, from_charset = 'UTF-8', shorten = false) ⇒ Object
Returns the text in UTF-8 format with all HTML tags removed
TODO:
- add support for DL, OL
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/html_to_plain_text.rb', line 12 def convert_to_text(html, line_length, from_charset = 'UTF-8', shorten = false) r = Text::Reform.new(:trim => true, :squeeze => false, :break => Text::Reform.break_wrap) txt = html bitly = shorten ? Bitly4R.Keyed("mikedamage", "R_abb45e99634386334b7ed6c8d081e80e") : nil he = HTMLEntities.new # decode HTML entities txt = he.decode(txt) txt.gsub!(/<h([0-9]+)[^>]*>(.*)<\/h[0-9]+>/i) do |s| # handle headings hlevel = $1.to_i htext = $2.gsub(/<\/?[^>]*>/i, '') # remove tags inside headings hlength = (htext.length > line_length ? line_length : htext.length) case hlevel when 1 # H1 ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength) + "\n" when 2 # H2 ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength) + "\n" else # H3-H6 are styled the same htext + "\n" + ('-' * htext.length) + "\n" end end txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s| # links if bitly $2 + ' [' + bitly.shorten($1) + ']' else $2 + ' [' + $1 + ']' end end txt.gsub!(/(<li[\s]+[^>]*>|<li>)/i, ' * ') # unordered LIsts txt.gsub!(/<\/p>/i, "\n\n") # paragraphs txt.gsub!(/<\/?[^>]*>/, '') # strip remaining tags txt.gsub!(/\A[\s]+|[\s]+\Z|^[ \t]+/m, '') # strip extra spaces txt.gsub!(/[\n]{3,}/m, "\n\n") # tighten line breaks txt = r.format(('[' * line_length), txt) # wrap text txt.gsub!(/^[\*][\s]/m, ' * ') # add spaces back to lists txt end |