Class: Pagedump::Driver

Inherits:
Object
  • Object
show all
Defined in:
lib/pagedump/driver.rb

Overview

WARNING !! Not Thread-Safe

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeDriver

Returns a new instance of Driver.



11
12
13
14
# File 'lib/pagedump/driver.rb', line 11

def initialize
  @wlinks = {}
  @data = {}
end

Class Method Details

.inherited(subclass) ⇒ Object



7
8
9
# File 'lib/pagedump/driver.rb', line 7

def self.inherited(subclass)
  Pagedump.register_driver subclass
end

Instance Method Details

#check(page) ⇒ Object



44
45
# File 'lib/pagedump/driver.rb', line 44

def check page
end

#data(key, value) ⇒ Object



16
17
18
19
# File 'lib/pagedump/driver.rb', line 16

def data key, value
  @data[key] ||= []
  @data[key] << value
end


21
22
23
24
25
26
27
28
29
# File 'lib/pagedump/driver.rb', line 21

def link weight, href
  begin
    relative = URI.parse(href)
    abs_link = URI.parse(url).merge(relative).to_s
    @wlinks[abs_link] = weight
  rescue URI::InvalidURIError
    Pagedump.logger.warn "[Driver #{name}] Error parsing href \"#{href}\". Ignoring link (weight was #{weight})"
  end
end

#nameObject



51
52
53
# File 'lib/pagedump/driver.rb', line 51

def name
  self.class.name
end

#scrapObject



31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/pagedump/driver.rb', line 31

def scrap
  @wlinks = {}
  Pagedump.logger.info "Getting headlines for url #{url}"
  agent = Mechanize.new
  page = agent.get(url)
  self.links page
  self.check page
  result = OpenStruct.new
  result.links = @wlinks
  result.data = @data
  result
end

#urlObject



47
48
49
# File 'lib/pagedump/driver.rb', line 47

def url
  self.class::URL
end