Module: Tjcrawler
- Defined in:
- lib/tjcrawler.rb,
lib/tjcrawler/page.rb,
lib/tjcrawler/config.rb,
lib/tjcrawler/parser.rb,
lib/tjcrawler/crawler.rb,
lib/tjcrawler/version.rb,
lib/tjcrawler/scheduler.rb,
lib/tjcrawler/crawler/result.rb
Defined Under Namespace
Classes: Config, Crawler, Page, Parser, Scheduler
Constant Summary
collapse
- VERSION =
"0.0.2"
Class Method Summary
collapse
Class Method Details
.set_db(config) ⇒ Object
30
31
32
|
# File 'lib/tjcrawler.rb', line 30
def set_db config
ActiveRecord::Base.establish_connection config.db_setting
end
|
.start_crawler {|config| ... } ⇒ Object
8
9
10
11
12
13
14
15
16
17
|
# File 'lib/tjcrawler.rb', line 8
def start_crawler
config = Config.new
yield config
set_db(config)
threads = []
config.threads.times.each do
threads << Thread.new{ Tjcrawler::Crawler.new(config.css_selector).start }
end
threads.each(&:join)
end
|
.start_parser {|config| ... } ⇒ Object
19
20
21
22
23
24
25
26
27
28
|
# File 'lib/tjcrawler.rb', line 19
def start_parser
config = Config.new
yield config
set_db(config)
threads = []
config.threads.times.each do
threads << Thread.new{ Tjcrawler::Parser.new(&config.proc).start }
end
threads.each(&:join)
end
|