Module: Tjcrawler

Defined in:
lib/tjcrawler.rb,
lib/tjcrawler/page.rb,
lib/tjcrawler/config.rb,
lib/tjcrawler/parser.rb,
lib/tjcrawler/crawler.rb,
lib/tjcrawler/version.rb,
lib/tjcrawler/scheduler.rb,
lib/tjcrawler/crawler/result.rb

Defined Under Namespace

Classes: Config, Crawler, Page, Parser, Scheduler

Constant Summary collapse

VERSION =
"0.0.2"

Class Method Summary collapse

Class Method Details

.set_db(config) ⇒ Object



30
31
32
# File 'lib/tjcrawler.rb', line 30

def set_db config
  ActiveRecord::Base.establish_connection config.db_setting
end

.start_crawler {|config| ... } ⇒ Object

Yields:

  • (config)


8
9
10
11
12
13
14
15
16
17
# File 'lib/tjcrawler.rb', line 8

def start_crawler
  config = Config.new
  yield config
  set_db(config)
  threads = []
  config.threads.times.each do
    threads << Thread.new{ Tjcrawler::Crawler.new(config.css_selector).start }
  end
  threads.each(&:join)
end

.start_parser {|config| ... } ⇒ Object

Yields:

  • (config)


19
20
21
22
23
24
25
26
27
28
# File 'lib/tjcrawler.rb', line 19

def start_parser
  config = Config.new
  yield config
  set_db(config)
  threads = []
  config.threads.times.each do
    threads << Thread.new{ Tjcrawler::Parser.new(&config.proc).start }
  end
  threads.each(&:join)
end