Class: Tjcrawler::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/tjcrawler/parser.rb

Constant Summary collapse

@@semaphore =
Mutex.new

Instance Method Summary collapse

Constructor Details

#initialize {|'Block required'| ... } ⇒ Parser

a nokogiri doc will be yield in block

Yields:

  • ('Block required')


7
8
9
10
# File 'lib/tjcrawler/parser.rb', line 7

def initialize &block
  yield 'Block required' unless block_given?
  @strategy = block
end

Instance Method Details

#parse(content) ⇒ Object



12
13
14
15
16
17
18
# File 'lib/tjcrawler/parser.rb', line 12

def parse content
  doc = Nokogiri::HTML(content)
  ret = nil
  @@semaphore.synchronize{ ret = @strategy[doc] }
  print :'.'
  ret
end

#startObject



20
21
22
23
24
25
26
# File 'lib/tjcrawler/parser.rb', line 20

def start
  loop do
    sleep 1 until page = find_next
    parse(page.content)
    page.touch(:parsed_at)
  end
end