Class: FeedParser::Parser
- Inherits:
-
Object
- Object
- FeedParser::Parser
- Includes:
- LogUtils::Logging
- Defined in:
- lib/feedparser/parser.rb
Class Method Summary collapse
-
.parse(text, opts = {}) ⇒ Object
convenience class/factory method.
Instance Method Summary collapse
-
#initialize(text) ⇒ Parser
constructor
Note: lets keep/use same API as RSS::Parser for now.
- #parse ⇒ Object
- #parse_json ⇒ Object
- #parse_microformats ⇒ Object
- #parse_xml ⇒ Object
Constructor Details
#initialize(text) ⇒ Parser
Note: lets keep/use same API as RSS::Parser for now
17 18 19 |
# File 'lib/feedparser/parser.rb', line 17 def initialize( text ) @text = text end |
Class Method Details
.parse(text, opts = {}) ⇒ Object
convenience class/factory method
12 13 14 |
# File 'lib/feedparser/parser.rb', line 12 def self.parse( text, opts={} ) self.new( text ).parse end |
Instance Method Details
#parse ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/feedparser/parser.rb', line 23 def parse head = @text[0..100].strip # note: remove leading spaces if present jsonfeed_version_regex = %r{"version":\s*"https://jsonfeed.org/version/1"} ## check if starts with knownn xml prologs if head.start_with?( '<?xml' ) || head.start_with?( '<feed/' ) || head.start_with?( '<rss/' ) ## check if starts with { for json object/hash ## or if includes jsonfeed prolog parse_xml elsif head.start_with?( '{' ) || head =~ jsonfeed_version_regex parse_json ## note: reading/parsing microformat is for now optional ## microformats gem requires nokogiri ## nokogiri (uses libxml c-extensions) makes it hard to install (sometime) ## thus, if you want to use it, please opt-in to keep the install "light" # # for now check for microformats v2 (e.g. h-entry, h-feed) # check for v1 too - why? why not? (e.g. hentry, hatom ??) elsif defined?( Microformats ) && (@text.include?( 'h-entry' ) || @text.include?( 'h-feed' ) ) parse_microformats else ## assume xml for now parse_xml end end |
#parse_json ⇒ Object
71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/feedparser/parser.rb', line 71 def parse_json logger.debug "using stdlib json/#{JSON::VERSION}" logger.debug "Parsing feed in json..." feed_hash = JSON.parse( @text ) feed = JsonFeedBuilder.build( feed_hash ) logger.debug "== #{feed.format} / #{feed.title} ==" feed # return new (normalized) feed end |
#parse_microformats ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/feedparser/parser.rb', line 56 def parse_microformats logger.debug "using microformats/#{Microformats::VERSION}" logger.debug "Parsing feed in html (w/ microformats)..." collection = Microformats.parse( @text ) collection_hash = collection.to_hash feed = HyFeedBuilder.build( collection_hash ) logger.debug "== #{feed.format} / #{feed.title} ==" feed # return new (normalized) feed end |
#parse_xml ⇒ Object
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/feedparser/parser.rb', line 84 def parse_xml logger.debug "using stdlib rss/#{RSS::VERSION}" parser = RSS::Parser.new( @text ) parser.do_validate = false parser.ignore_unknown_element = true logger.debug "Parsing feed in xml..." feed_wild = parser.parse # not yet normalized logger.debug " feed.class=#{feed_wild.class.name}" if feed_wild.is_a?( RSS::Atom::Feed ) feed = AtomFeedBuilder.build( feed_wild ) else # -- assume RSS::Rss::Feed feed = RssFeedBuilder.build( feed_wild ) end logger.debug "== #{feed.format} / #{feed.title} ==" feed # return new (normalized) feed end |