Class: Elsmore::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/elsmore/scraper.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(initial_url) ⇒ Scraper

Returns a new instance of Scraper.



5
6
7
8
9
10
11
12
13
14
15
# File 'lib/elsmore/scraper.rb', line 5

def initialize initial_url
  seed = Elsmore::Document.new(initial_url)

  self.valid_domains = [seed.url.host]

  self.unprocessed = [seed]
  self.unprocessed_urls = [seed.url.canonical_url]

  self.processed = []
  self.invalid = []
end

Instance Attribute Details

#emitterObject

Returns the value of attribute emitter.



3
4
5
# File 'lib/elsmore/scraper.rb', line 3

def emitter
  @emitter
end

#invalidObject

Returns the value of attribute invalid.



3
4
5
# File 'lib/elsmore/scraper.rb', line 3

def invalid
  @invalid
end

#processedObject

Returns the value of attribute processed.



3
4
5
# File 'lib/elsmore/scraper.rb', line 3

def processed
  @processed
end

#unprocessedObject

Returns the value of attribute unprocessed.



3
4
5
# File 'lib/elsmore/scraper.rb', line 3

def unprocessed
  @unprocessed
end

#unprocessed_urlsObject

Returns the value of attribute unprocessed_urls.



3
4
5
# File 'lib/elsmore/scraper.rb', line 3

def unprocessed_urls
  @unprocessed_urls
end

#valid_domainsObject

Returns the value of attribute valid_domains.



3
4
5
# File 'lib/elsmore/scraper.rb', line 3

def valid_domains
  @valid_domains
end

Instance Method Details

#runObject



17
18
19
20
21
22
23
# File 'lib/elsmore/scraper.rb', line 17

def run
  while !unprocessed.empty?
    document = unprocessed.shift
    process document
  end
  self
end