Class: Ubi::Aranea

Inherits:
Object
  • Object
show all
Defined in:
lib/ubi/aranea.rb

Overview

Base for araneas (spiders)

Constant Summary collapse

OPTIONS =
{
  workers: 3,
  user_agent: "Ubi v#{Ubi::VERSION}",
  depth_limit: 1

  # storage: MemoryStore
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(thema, url, opts = {}) ⇒ Aranea

Returns a new instance of Aranea.



15
16
17
18
19
# File 'lib/ubi/aranea.rb', line 15

def initialize(thema, url, opts = {})
  @thema = thema
  @url   = url
  @opts  = opts
end

Instance Attribute Details

#datumObject

storage: MemoryStore



13
14
15
# File 'lib/ubi/aranea.rb', line 13

def datum
  @datum
end

#themaObject

storage: MemoryStore



13
14
15
# File 'lib/ubi/aranea.rb', line 13

def thema
  @thema
end

#urlObject

storage: MemoryStore



13
14
15
# File 'lib/ubi/aranea.rb', line 13

def url
  @url
end

Instance Method Details

#crawl!Object



23
24
25
26
27
28
29
30
31
# File 'lib/ubi/aranea.rb', line 23

def crawl!
  Polipus.crawler(name, url, OPTIONS.merge(@opts)) do |crawler|
    # In-place page processing
    crawler.on_page_downloaded do |page|
      # A nokogiri object
      puts "'#{page.doc.css('title').text}' (#{page.url})"
    end
  end
end

#parser(chunk) ⇒ Object



33
34
35
# File 'lib/ubi/aranea.rb', line 33

def parser(chunk)
  Nokogiri::HTML(chunk)
end