Module: TruffleHog

Defined in:
lib/truffle-hog.rb

Constant Summary collapse

VERSION =
"0.0.3"

Class Method Summary collapse

Class Method Details

.collect(tags, type) ⇒ Object



36
37
38
# File 'lib/truffle-hog.rb', line 36

def self.collect(tags, type)
  tags.collect {|t| t if feed?(t, type)}.compact
end

.feed?(html, type) ⇒ Boolean

Returns:

  • (Boolean)


40
41
42
# File 'lib/truffle-hog.rb', line 40

def self.feed?(html, type)
  html =~ /.*type=['"]application\/#{type}\+xml['"].*/
end

.parse_feed_urls(html, favor = :all) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
# File 'lib/truffle-hog.rb', line 4

def self.parse_feed_urls(html, favor = :all)
  rss_links  = scan_for_tag(html, "rss")
  atom_links = scan_for_tag(html, "atom")

  case favor
  when :all
    (rss_links + atom_links).uniq
  when :rss
    rss_links.empty? ? atom_links : rss_links
  when :atom
    atom_links.empty? ? rss_links : atom_links
  end
end

.scan_for_tag(html, type) ⇒ Object



18
19
20
# File 'lib/truffle-hog.rb', line 18

def self.scan_for_tag(html, type)
  urls(html, "link", type) + urls(html, "a", type)
end

.urls(html, tag, type) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/truffle-hog.rb', line 22

def self.urls(html, tag, type)
  tags = html.scan(/(<#{tag}.*?>)/).flatten
  feed_tags = collect(tags, type)
  feed_tags.map do |tag|
    matches = tag.match(/.*href=['"](.*?)['"].*/)
    if matches.nil?
      url = ""
    else
      url = matches[1]
    end
    url =~ /^http.*/ ? url : nil
  end.compact
end