Class: W3map::Neurons::SimpleCrawler

Inherits:
Neuron
  • Object
show all
Defined in:
lib/w3map/neurons/simple_crawler.rb

Instance Method Summary collapse

Methods inherited from Neuron

#initialize

Constructor Details

This class inherits a constructor from W3map::Neurons::Neuron

Instance Method Details

#process(url, response, data) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/w3map/neurons/simple_crawler.rb', line 5

def process(url, response, data)
  if response.header['Content-Type'] =~ /(.*)text\/html(.*)/
    doc = Nokogiri::HTML(response.body)
    data['page-title'] = doc.search('head/title').text || ''
    doc.css('a').each do |a|
      unless a[:href].nil?
        founded_url = HtmlHelpers.normalize HtmlHelpers.make_absolute(url, a[:href])
        if HtmlHelpers.is_internal?(url, founded_url) && !@bot.session.queue.already_processed?(founded_url)
          @bot.session.queue.push founded_url 
        else
          @bot.session.queue.add_to_processing_urls(founded_url)
        end
      end
    end
  end
end