Class: FeedParser::LooseFeedParser

Inherits:
BetterSGMLParser show all
Includes:
FeedParserMixin
Defined in:
lib/rfeedparser/loose_feed_parser.rb

Constant Summary collapse

Elements_No_End_Tag =
['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
'img', 'input', 'isindex', 'link', 'meta', 'param']
New_Declname_Re =
/[a-zA-Z][-_.a-zA-Z0-9:]*\s*/

Constants inherited from BetterSGMLParser

BetterSGMLParser::Attrfind, BetterSGMLParser::Charref, BetterSGMLParser::Commentclose, BetterSGMLParser::Commentopen, BetterSGMLParser::Declopen, BetterSGMLParser::Endbracket, BetterSGMLParser::Endtagfind, BetterSGMLParser::Endtagopen, BetterSGMLParser::Entityref, BetterSGMLParser::Incomplete, BetterSGMLParser::Interesting, BetterSGMLParser::Piclose, BetterSGMLParser::Piopenbegin, BetterSGMLParser::Shorttag, BetterSGMLParser::Shorttagopen, BetterSGMLParser::Tagfind

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from BetterSGMLParser

#error, #goahead, #handle_decl, #handle_pi, #output, #parse_comment, #parse_endtag, #parse_pi, #parse_starttag

Constructor Details

#initialize(baseuri, baselang, encoding) ⇒ LooseFeedParser

Returns a new instance of LooseFeedParser.



24
25
26
27
# File 'lib/rfeedparser/loose_feed_parser.rb', line 24

def initialize(baseuri, baselang, encoding)
  startup(baseuri, baselang, encoding)
  super() # Keep the parentheses! No touchy.
end

Instance Attribute Details

#bozoObject

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



11
12
13
# File 'lib/rfeedparser/loose_feed_parser.rb', line 11

def bozo
  @bozo
end

#encodingObject

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



11
12
13
# File 'lib/rfeedparser/loose_feed_parser.rb', line 11

def encoding
  @encoding
end

#entriesObject

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



11
12
13
# File 'lib/rfeedparser/loose_feed_parser.rb', line 11

def entries
  @entries
end

#feeddataObject

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



11
12
13
# File 'lib/rfeedparser/loose_feed_parser.rb', line 11

def feeddata
  @feeddata
end

#namespacesInUseObject

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



11
12
13
# File 'lib/rfeedparser/loose_feed_parser.rb', line 11

def namespacesInUse
  @namespacesInUse
end

Instance Method Details

#decodeEntities(element, data) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/rfeedparser/loose_feed_parser.rb', line 54

def decodeEntities(element, data)
  data.gsub!('<', '<')
  data.gsub!('<', '<')
  data.gsub!('>', '>')
  data.gsub!('>', '>')
  data.gsub!('&', '&')
  data.gsub!('&', '&')
  data.gsub!('"', '"')
  data.gsub!('"', '"')
  data.gsub!(''', ''')
  data.gsub!(''', ''')
  if @contentparams.has_key? 'type' and not ((@contentparams['type'] || 'xml') =~ /xml$/u)
 data.gsub!('&lt;', '<')
 data.gsub!('&gt;', '>')
 data.gsub!('&amp;', '&')
 data.gsub!('&quot;', '"')
 data.gsub!('&apos;', "'")
  end
  return data
end

#feedObject



17
18
19
# File 'lib/rfeedparser/loose_feed_parser.rb', line 17

def feed
  @feeddata
end

#feed=(data) ⇒ Object



20
21
22
# File 'lib/rfeedparser/loose_feed_parser.rb', line 20

def feed=(data)
  @feeddata = data
end

#parse(data) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/rfeedparser/loose_feed_parser.rb', line 34

def parse(data)
  data.gsub!(/<!((?!DOCTYPE|--|\[))/i,  '&lt;!\1')
 data.gsub!(/<([^<\s]+?)\s*\/>/) do |tag|
clean = tag[1..-3].strip
if Elements_No_End_Tag.include?clean
  tag
else
'<'+clean+'></'+clean+'>'
end
 end

 data.gsub!(/&#39;/, "'")
data.gsub!(/&#34;/, "'")
if @encoding and not @encoding.empty? # FIXME unicode check type(u'')
  data = uconvert(data,'utf-8',@encoding)
end
 sgml_feed(data) # see the alias above
end

#resetObject



29
30
31
32
# File 'lib/rfeedparser/loose_feed_parser.rb', line 29

def reset
  @pieces = []
  super
end

#sgml_feedObject

feed needs to mapped to feeddata, not the SGMLParser method feed. I think.



16
# File 'lib/rfeedparser/loose_feed_parser.rb', line 16

alias :sgml_feed :feed