Class: HTML2TextParser
- Inherits:
-
SGMLParser
- Object
- SGMLParser
- HTML2TextParser
- Defined in:
- lib/feed2imap/html2text-parser.rb
Overview
this class provides a simple SGML parser that removes HTML tags
Constant Summary
Constants inherited from SGMLParser
SGMLParser::Attrfind, SGMLParser::Charref, SGMLParser::Commentclose, SGMLParser::Commentopen, SGMLParser::Endbracket, SGMLParser::Endtagopen, SGMLParser::Entitydefs, SGMLParser::Entityref, SGMLParser::Incomplete, SGMLParser::Interesting, SGMLParser::Special, SGMLParser::Starttagopen, SGMLParser::Tagfind
Instance Attribute Summary collapse
-
#savedata ⇒ Object
readonly
Returns the value of attribute savedata.
Instance Method Summary collapse
- #close ⇒ Object
- #handle_data(data) ⇒ Object
-
#initialize(verbose = false) ⇒ HTML2TextParser
constructor
A new instance of HTML2TextParser.
- #unknown_endtag(tag) ⇒ Object
- #unknown_starttag(tag, attrs) ⇒ Object
Methods inherited from SGMLParser
#feed, #finish_endtag, #finish_starttag, #goahead, #handle_charref, #handle_comment, #handle_endtag, #handle_entityref, #handle_special, #handle_starttag, #has_context, #parse_comment, #parse_endtag, #parse_special, #parse_starttag, #report_unbalanced, #reset, #setliteral, #setnomoretags, #unknown_charref, #unknown_entityref
Constructor Details
#initialize(verbose = false) ⇒ HTML2TextParser
Returns a new instance of HTML2TextParser.
27 28 29 30 31 32 33 |
# File 'lib/feed2imap/html2text-parser.rb', line 27 def initialize(verbose = false) @savedata = '' @pre = false @href = nil @links = [] super(verbose) end |
Instance Attribute Details
#savedata ⇒ Object (readonly)
Returns the value of attribute savedata.
25 26 27 |
# File 'lib/feed2imap/html2text-parser.rb', line 25 def savedata @savedata end |
Instance Method Details
#close ⇒ Object
71 72 73 74 75 76 77 78 79 |
# File 'lib/feed2imap/html2text-parser.rb', line 71 def close super if @links.length > 0 @savedata << "\n\n" @links.each_index do |i| @savedata << "[#{i+1}] #{@links[i]}\n" end end end |
#handle_data(data) ⇒ Object
35 36 37 38 39 40 |
# File 'lib/feed2imap/html2text-parser.rb', line 35 def handle_data(data) # let's remove all CR data.gsub!(/\n/, '') if not @pre @savedata << data end |
#unknown_endtag(tag) ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/feed2imap/html2text-parser.rb', line 81 def unknown_endtag(tag) case tag when 'b' @savedata << '*' when 'u' @savedata << '_' when 'i' @savedata << '/' when 'pre' @savedata << "\n\n" @pre = false when 'a' if @href @savedata << "[#{@links.length}]" @href = nil end end end |
#unknown_starttag(tag, attrs) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/feed2imap/html2text-parser.rb', line 42 def unknown_starttag(tag, attrs) case tag when 'p' @savedata << "\n\n" when 'br' @savedata << "\n" when 'b' @savedata << '*' when 'u' @savedata << '_' when 'i' @savedata << '/' when 'pre' @savedata << "\n\n" @pre = true when 'a' # find href in args @href = nil attrs.each do |a| if a[0] == 'href' @href = a[1] end end if @href @links << @href.gsub(/^("|'|)(.*)("|')$/,'\2') end end end |