Class: ArchiveOrgLinkGrabber

Inherits:
Object
  • Object
show all
Defined in:
lib/utils/extract_urls.rb

Overview

Defined Under Namespace

Classes: Links

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(urls) ⇒ ArchiveOrgLinkGrabber

Returns a new instance of ArchiveOrgLinkGrabber.



55
56
57
# File 'lib/utils/extract_urls.rb', line 55

def initialize(urls)
  process_urls(Array(urls))
end

Instance Attribute Details

Returns the value of attribute links.



53
54
55
# File 'lib/utils/extract_urls.rb', line 53

def links
  @links
end

Instance Method Details



68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/utils/extract_urls.rb', line 68

def extract_links(doc)
  base_url = doc.css('base').first['href']
  found_links = []
  doc.css('a').each do |link|
    next unless link['href'] =~ /mp3$/
    next if link['href'] =~ /_flat.mp3$/
    next if link['href'] =~ /_CT_EQ.mp3$/
    next if link['href'] =~ /_CT_EQ.mp3$/
    found_links << base_url + link['href']
  end
  found_links
end

#process_urls(urls) ⇒ Object



59
60
61
62
63
64
65
66
# File 'lib/utils/extract_urls.rb', line 59

def process_urls(urls)
  lnks = []
  urls.each do |url|
    doc = Nokogiri::HTML(open(url))
    lnks = lnks + extract_links(doc)
  end
  self.links = Links.new(lnks)
end

#yaml_formattedObject



81
82
83
# File 'lib/utils/extract_urls.rb', line 81

def yaml_formatted
  links.yaml_formatted
end