Module: TruffleHog
- Defined in:
- lib/truffle-hog.rb
Constant Summary collapse
- VERSION =
"0.0.3"
Class Method Summary collapse
- .collect(tags, type) ⇒ Object
- .feed?(html, type) ⇒ Boolean
- .parse_feed_urls(html, favor = :all) ⇒ Object
- .scan_for_tag(html, type) ⇒ Object
- .urls(html, tag, type) ⇒ Object
Class Method Details
.collect(tags, type) ⇒ Object
36 37 38 |
# File 'lib/truffle-hog.rb', line 36 def self.collect(, type) .collect {|t| t if feed?(t, type)}.compact end |
.feed?(html, type) ⇒ Boolean
40 41 42 |
# File 'lib/truffle-hog.rb', line 40 def self.feed?(html, type) html =~ /.*type=['"]application\/#{type}\+xml['"].*/ end |
.parse_feed_urls(html, favor = :all) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 |
# File 'lib/truffle-hog.rb', line 4 def self.parse_feed_urls(html, favor = :all) rss_links = scan_for_tag(html, "rss") atom_links = scan_for_tag(html, "atom") case favor when :all (rss_links + atom_links).uniq when :rss rss_links.empty? ? atom_links : rss_links when :atom atom_links.empty? ? rss_links : atom_links end end |
.scan_for_tag(html, type) ⇒ Object
18 19 20 |
# File 'lib/truffle-hog.rb', line 18 def self.scan_for_tag(html, type) urls(html, "link", type) + urls(html, "a", type) end |
.urls(html, tag, type) ⇒ Object
22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/truffle-hog.rb', line 22 def self.urls(html, tag, type) = html.scan(/(<#{tag}.*?>)/).flatten = collect(, type) .map do |tag| matches = tag.match(/.*href=['"](.*?)['"].*/) if matches.nil? url = "" else url = matches[1] end url =~ /^http.*/ ? url : nil end.compact end |