Class: FeedMe::ParserBuilder

Inherits:
Object
  • Object
show all
Defined in:
lib/feedme.rb

Overview

This class is used to create promiscuous parsers.

Direct Known Subclasses

StrictParserBuilder

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ ParserBuilder

Create a new ParserBuilder. Allowed options are:

  • :empty_string_for_nil => false # return the empty string instead of a nil value

  • :error_on_missing_key => false # raise an error if a specified key or virtual method does not exist (otherwise nil is returned)



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/feedme.rb', line 68

def initialize(options={})
  @options = options
  
  # rss tags
	@rss_tags = [
	  {
		  :image     => nil,
      :textinput => nil,
      :skiphours => nil,
      :skipdays  => nil,
      :items     => [{ :rdf_seq => nil }],
     #:item      => @rss_item_tags
		}
	]
  @rss_item_tags = [ {} ]

  #atom tags
  @atom_tags = [
    {
      :author       => nil,
      :contributor  => nil,
     #:entry        => @atom_entry_tags
    }
  ]
  @atom_entry_tags = [
    {
      :author       => nil, 
      :contributor  => nil
    }
  ]

  # tags whose value is a date
  @date_tags = [ :pubdate, :lastbuilddate, :published, :updated, :dc_date, 
    :expirationdate ]
  
  # tags that can be used as the default value for a mixed element
  @value_tags = {
    :media_content => :url
  }
  @default_value_tags = [ CONTENT_KEY, :href, :url ]
  
  # tag/attribute aliases
	@aliases = {
	  :items        => :item_array,
	  :item_array   => :entry_array,
	  :entries      => :entry_array,
	  :entry_array  => :item_array,
	  :link         => :'link+self'
	}
	
	# transformations
	@html_helper_lib = HPRICOT_HELPER
	@default_transformation = [ :cleanHtml ]
	@transformations = {}
	@transformation_fns = {
	  # remove all HTML tags
	  :stripHtml => proc do |str| 
	    require @html_helper_lib
	    FeedMe.html_helper.strip_html(str)
	  end,
	  
	  # clean HTML content using FeedNormalizer's HtmlCleaner class
	  :cleanHtml => proc do |str| 
	    require @html_helper_lib
	    FeedMe.html_helper.clean_html(str)
	  end, 
	  
	  # wrap text at a certain number of characters (respecting word boundaries)
	  :wrap => proc do |str, col| 
	    str.gsub(/(.{1,#{col}})( +|$\n?)|(.{1,#{col}})/, "\\1\\3\n").strip 
	  end,
	  
	  # truncate text, respecting word boundaries
	  :trunc => proc {|str, wordcount| str.trunc(wordcount.to_i) },
    
    # truncate HTML and leave enclosing HTML tags
    :truncHtml => proc do |str, wordcount| 
      require @html_helper_lib
	    FeedMe.html_helper.truncate_html(str, wordcount.to_i)
    end,
    
    :regexp => proc do |str, regexp|
      match = Regexp.new(regexp).match(str)
      match.nil? ? nil : match[1]
    end,
    
    # this shouldn't be necessary since all text is automatically
    # unescaped, but some feeds double-escape HTML
    :esc => proc {|str| CGI.unescapeHTML(str) }
	}
end

Instance Attribute Details

#aliasesObject

A hash of attribute/tag name aliases.



50
51
52
# File 'lib/feedme.rb', line 50

def aliases
  @aliases
end

#atom_entry_tagsObject

The subtags of entry elements that are parsed for Atom feeds.



41
42
43
# File 'lib/feedme.rb', line 41

def 
  @atom_entry_tags
end

#atom_tagsObject

The tags that are parsed for Atom feeds.



39
40
41
# File 'lib/feedme.rb', line 39

def atom_tags
  @atom_tags
end

#date_tagsObject

The names of tags that should be parsed as date values.



43
44
45
# File 'lib/feedme.rb', line 43

def date_tags
  @date_tags
end

#default_transformationObject

An array of the transformation functions applied when the ! suffix is added to the attribute/tag name.



53
54
55
# File 'lib/feedme.rb', line 53

def default_transformation
  @default_transformation
end

#default_value_tagsObject

Tags to use for element value when specific tag isn’t specified



48
49
50
# File 'lib/feedme.rb', line 48

def default_value_tags
  @default_value_tags
end

#html_helper_libObject

the helper library used for HTML transformations



62
63
64
# File 'lib/feedme.rb', line 62

def html_helper_lib
  @html_helper_lib
end

#optionsObject (readonly)

The options passed to this ParserBuilder’s constructor.



33
34
35
# File 'lib/feedme.rb', line 33

def options
  @options
end

#rss_item_tagsObject

The subtags of item elements that are parsed for RSS feeds.



37
38
39
# File 'lib/feedme.rb', line 37

def rss_item_tags
  @rss_item_tags
end

#rss_tagsObject

The tags that are parsed for RSS feeds.



35
36
37
# File 'lib/feedme.rb', line 35

def rss_tags
  @rss_tags
end

#transformation_fnsObject

Mapping of transformation function names to Procs.



60
61
62
# File 'lib/feedme.rb', line 60

def transformation_fns
  @transformation_fns
end

#transformationsObject

Mapping of transformation names to functions. Each key is a suffix that can be appended to an attribute/tag name, and the value is an array of transformation function names that are applied when that transformation is used.



58
59
60
# File 'lib/feedme.rb', line 58

def transformations
  @transformations
end

#value_tagsObject

An array of names of attributes/subtags whose values can be used as the default value of a mixed element.



46
47
48
# File 'lib/feedme.rb', line 46

def value_tags
  @value_tags
end

Instance Method Details

#all_atom_tagsObject

Prepare tag list for an Atom feed.



168
169
170
171
172
# File 'lib/feedme.rb', line 168

def all_atom_tags
  all_tags = atom_tags.dup
  all_tags[0][:entry] = .dup
  return all_tags
end

#all_rss_tagsObject

Prepare tag list for an RSS feed.



161
162
163
164
165
# File 'lib/feedme.rb', line 161

def all_rss_tags
  all_tags = rss_tags.dup
  all_tags[0][:item] = rss_item_tags.dup
  return all_tags
end

#emulate_atom!Object

Add aliases so that RSS feed elements can be accessed using the names of their Atom counterparts.



191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/feedme.rb', line 191

def emulate_atom!
  aliases.merge!({
    :rights       => :copyright,
    :content      => :description,
    :contributor  => :author,
    :id           => [ :guid_value, :link ],
    :author       => [ :managingeditor, :webmaster ],
    :updated      => [ :lastbuilddate, :pubdate ],
    :published    => [ :pubDate, :lastbuilddate ],
    :icon         => :'image/url',
    :logo         => :'image/url',
    :summary      => :'description_trunc'
  })
end

#emulate_rss!Object

Add aliases so that Atom feed elements can be accessed using the names of their RSS counterparts.



176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/feedme.rb', line 176

def emulate_rss!
  aliases.merge!({
    :guid           => :id,       # this alias never actually gets used; see FeedData#id
    :copyright      => :rights,
    :pubdate        => [ :published, :updated ],
    :lastbuilddate  => [ :updated, :published ],
    :description    => [ :content, :summary ],
    :managingeditor => [ :'author/name', :'contributor/name' ],
    :webmaster      => [ :'author/name', :'contributor/name' ],
    :image          => [ :icon, :logo ]
  })
end

#parse(source) ⇒ Object

Parse source using a Parser created from this ParserBuilder.



207
208
209
# File 'lib/feedme.rb', line 207

def parse(source)
 Parser.new(self, source, options)
end