Module: ActionMailer::Text::HtmlToPlainText
- Defined in:
- lib/actionmailer-text/html_to_plain_text.rb
Class Method Summary collapse
-
.convert_to_text(html, line_length = 65, _from_charset = 'UTF-8') ⇒ Object
Returns the text in UTF-8 format with all HTML tags removed.
-
.custom_word_wrap(txt, line_length) ⇒ Object
Taken from Rails’ word_wrap helper (api.rubyonrails.org/classes/ActionView/Helpers/TextHelper.html#method-i-word_wrap).
Instance Method Summary collapse
Class Method Details
.convert_to_text(html, line_length = 65, _from_charset = 'UTF-8') ⇒ Object
Returns the text in UTF-8 format with all HTML tags removed
TODO: add support for DL, OL
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/actionmailer-text/html_to_plain_text.rb', line 20 def self.convert_to_text(html, line_length = 65, _from_charset = 'UTF-8') txt = html # decode HTML entities he = HTMLEntities.new txt = he.decode(txt) # Ignore things that come outside the body txt.gsub!(/.*?(<body.+?\/body>).*?/im, '\1') # replace image by their alt attribute txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1') txt.gsub!(/<img.+?alt='([^\']*)\'[^>]*\/>/i, '\1') # links txt.gsub!(/<a.+?href=\"([^\"]*)\"[^>]*>(.+?)<\/a>/mi) do |_s| Regexp.last_match[2].strip + ' ( ' + Regexp.last_match[1].strip + ' )' end txt.gsub!(/<a.+?href='([^\']*)\'[^>]*>(.+?)<\/a>/mi) do |_s| Regexp.last_match[2].strip + ' ( ' + Regexp.last_match[1].strip + ' )' end # handle headings (H1-H6) txt.gsub!(/(<\/h[1-6]>)/i, "\n\\1") # move closing tags to new lines txt.gsub!(/[\s]*<h([1-6]+)[^>]*>[\s]*(.*)[\s]*<\/h[1-6]+>/i) do |_s| hlevel = Regexp.last_match[1].to_i htext = Regexp.last_match[2] htext.gsub!(/<br[\s]*\/?>/i, "\n") # handle <br>s htext.gsub!(/<\/?[^>]*>/i, '') # strip tags # determine maximum line length hlength = 0 htext.each_line do |l| llength = l.strip.length hlength = llength if llength > hlength end hlength = line_length if line_length && hlength > line_length case hlevel when 1 # H1, asterisks above and below htext = ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength) when 2 # H1, dashes above and below htext = ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength) else # H3-H6, dashes below htext = htext + "\n" + ('-' * hlength) end "\n\n" + htext + "\n\n" end # wrap spans txt.gsub!(/(<\/span>)[\s]+(<span)/mi, '\1 \2') # lists -- TODO: should handle ordered lists txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ') # list not followed by a newline txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n") # paragraphs and line breaks txt.gsub!(/<\/p>/i, "\n\n") txt.gsub!(/<br[\/ ]*>/i, "\n") # strip remaining tags txt.gsub!(/<\/?[^>]*>/, '') txt = custom_word_wrap(txt, line_length) if line_length # remove linefeeds (\r\n and \r -> \n) txt.gsub!(/\r\n?/, "\n") # strip extra spaces txt.gsub!(/\302\240+/, ' ') # non-breaking spaces -> spaces txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines # no more than two consecutive newlines txt.gsub!(/[\n]{3,}/, "\n\n") # no more than two consecutive spaces txt.gsub!(/ {2,}/, ' ') txt.strip end |
.custom_word_wrap(txt, line_length) ⇒ Object
Taken from Rails’ word_wrap helper (api.rubyonrails.org/classes/ActionView/Helpers/TextHelper.html#method-i-word_wrap)
107 108 109 110 111 |
# File 'lib/actionmailer-text/html_to_plain_text.rb', line 107 def self.custom_word_wrap(txt, line_length) txt.split("\n").collect do |line| line.length > line_length ? line.gsub(/(.{1,#{line_length}})(\s+|$)/, "\\1\n").strip : line end * "\n" end |
Instance Method Details
#convert_to_text(html, line_length = 65, from_charset = 'UTF-8') ⇒ Object
13 14 15 |
# File 'lib/actionmailer-text/html_to_plain_text.rb', line 13 def convert_to_text(html, line_length = 65, from_charset = 'UTF-8') HtmlToPlainText.convert_to_text(html, line_length, from_charset) end |