Class: Fans::Fetcher
- Inherits:
-
Object
- Object
- Fans::Fetcher
- Defined in:
- lib/fans.rb
Instance Attribute Summary collapse
-
#links ⇒ Object
readonly
Returns the value of attribute links.
-
#pages ⇒ Object
readonly
Returns the value of attribute pages.
Instance Method Summary collapse
- #fetch(url) ⇒ Object
-
#initialize ⇒ Fetcher
constructor
A new instance of Fetcher.
- #page_by_id(id) ⇒ Object
Constructor Details
#initialize ⇒ Fetcher
Returns a new instance of Fetcher.
11 12 13 14 15 |
# File 'lib/fans.rb', line 11 def initialize @page_id = 0 @pages = Hash.new{|h, k| h[k] = (@page_id += 1)} @links = Hash.new{|h, from| h[from] = Hash.new{|h, to| h[to] = Set.new}} end |
Instance Attribute Details
#links ⇒ Object (readonly)
Returns the value of attribute links.
9 10 11 |
# File 'lib/fans.rb', line 9 def links @links end |
#pages ⇒ Object (readonly)
Returns the value of attribute pages.
9 10 11 |
# File 'lib/fans.rb', line 9 def pages @pages end |
Instance Method Details
#fetch(url) ⇒ Object
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/fans.rb', line 24 def fetch(url) uri = URI.parse(url) host = "#{uri.scheme}://#{uri.host}" from_id = pages[url] resp = HTTP.get(url) html = Nokogiri::HTML(resp.body.to_s) html.css('a').each do |a| to_page = a.attributes['href']&.value next unless to_page to_page = File.join(host, to_page) if to_page.start_with?('/') to_id = pages[to_page] text = a.text.strip.gsub(/\s+/, "\s") text = a.attributes['title']&.value&.gsub(/\s+/, "\s") if text.empty? links[from_id][to_id] << text end end |
#page_by_id(id) ⇒ Object
17 18 19 20 21 22 |
# File 'lib/fans.rb', line 17 def page_by_id(id) if !@id_to_page || @id_to_page.size != pages.size @id_to_page = pages.invert end @id_to_page[id] end |