Class: Fans::Fetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/fans.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeFetcher

Returns a new instance of Fetcher.



11
12
13
14
15
# File 'lib/fans.rb', line 11

def initialize
  @page_id = 0
  @pages = Hash.new{|h, k| h[k] = (@page_id += 1)}
  @links = Hash.new{|h, from| h[from] = Hash.new{|h, to| h[to] = Set.new}}
end

Instance Attribute Details

Returns the value of attribute links.



9
10
11
# File 'lib/fans.rb', line 9

def links
  @links
end

#pagesObject (readonly)

Returns the value of attribute pages.



9
10
11
# File 'lib/fans.rb', line 9

def pages
  @pages
end

Instance Method Details

#fetch(url) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/fans.rb', line 24

def fetch(url)
  uri = URI.parse(url)
  host = "#{uri.scheme}://#{uri.host}"
  
  from_id = pages[url]
  resp = HTTP.get(url)
  html = Nokogiri::HTML(resp.body.to_s)
  
  html.css('a').each do |a|
    to_page = a.attributes['href']&.value
    next unless to_page
    to_page = File.join(host, to_page) if to_page.start_with?('/')
    to_id = pages[to_page]
    text = a.text.strip.gsub(/\s+/, "\s")
    text = a.attributes['title']&.value&.gsub(/\s+/, "\s") if text.empty?
    links[from_id][to_id] << text
  end
end

#page_by_id(id) ⇒ Object



17
18
19
20
21
22
# File 'lib/fans.rb', line 17

def page_by_id(id)
  if !@id_to_page || @id_to_page.size != pages.size
    @id_to_page = pages.invert
  end
  @id_to_page[id]
end