Class: FeedParser::Parser

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/feedparser/parser.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ Parser

Note: lets keep/use same API as RSS::Parser for now


17
18
19
# File 'lib/feedparser/parser.rb', line 17

def initialize( text )
  @text = text
end

Class Method Details

.parse(text, opts = {}) ⇒ Object

convenience class/factory method


12
13
14
# File 'lib/feedparser/parser.rb', line 12

def self.parse( text, opts={} )
  self.new( text ).parse
end

Instance Method Details

#parseObject


23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/feedparser/parser.rb', line 23

def parse
  head = @text[0..100].strip     # note: remove leading spaces if present

  jsonfeed_version_regex = %r{"version":\s*"https://jsonfeed.org/version/1"}

  ## check if starts with knownn xml prologs
  if head.start_with?( '<?xml' )  ||
     head.start_with?( '<feed/' ) ||
     head.start_with?( '<rss/' )
  ## check if starts with { for json object/hash
  ##    or if includes jsonfeed prolog
     parse_xml
  elsif head.start_with?( '{' ) ||
        head =~ jsonfeed_version_regex
     parse_json
  ##  note: reading/parsing microformat is for now optional
  ##    microformats gem requires nokogiri
  ##       nokogiri (uses libxml c-extensions) makes it hard to install (sometime)
  ##       thus, if you want to use it, please opt-in to keep the install "light"
  #
  #  for now check for microformats v2 (e.g. h-entry, h-feed)
  #    check for v1 too - why? why not? (e.g. hentry, hatom ??)
elsif defined?( Microformats ) &&
        (@text.include?( 'h-entry' ) ||
         @text.include?( 'h-feed' )
         )
    parse_microformats
  else  ## assume xml for now
     parse_xml
  end
end

#parse_jsonObject


71
72
73
74
75
76
77
78
79
80
81
# File 'lib/feedparser/parser.rb', line 71

def parse_json
  logger.debug "using stdlib json/#{JSON::VERSION}"

  logger.debug "Parsing feed in json..."
  feed_hash = JSON.parse( @text )

  feed = JsonFeedBuilder.build( feed_hash )

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end

#parse_microformatsObject


56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/feedparser/parser.rb', line 56

def parse_microformats
  logger.debug "using microformats/#{Microformats::VERSION}"

  logger.debug "Parsing feed in html (w/ microformats)..."

  collection = Microformats.parse( @text )
  collection_hash = collection.to_hash

  feed = HyFeedBuilder.build( collection_hash )

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end

#parse_xmlObject


84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/feedparser/parser.rb', line 84

def parse_xml
  logger.debug "using stdlib rss/#{RSS::VERSION}"

  parser = RSS::Parser.new( @text )

  parser.do_validate            = false
  parser.ignore_unknown_element = true

  logger.debug "Parsing feed in xml..."
  feed_wild = parser.parse  # not yet normalized

  logger.debug "  feed.class=#{feed_wild.class.name}"

  if feed_wild.is_a?( RSS::Atom::Feed )
    feed = AtomFeedBuilder.build( feed_wild )
  else  # -- assume RSS::Rss::Feed
    feed = RssFeedBuilder.build( feed_wild )
  end

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end