Class: WebCrawler::Application

Inherits:
CLI
  • Object
show all
Defined in:
lib/web_crawler/application.rb

Instance Method Summary collapse

Methods inherited from CLI

#help

Methods included from Thor::InheritedOptions

included

Methods included from Thor::Hooks

included

Instance Method Details

#factory(pattern, *params) ⇒ Object



67
68
69
70
71
72
73
74
75
76
# File 'lib/web_crawler/application.rb', line 67

def factory(pattern, *params)
  params.map! { |param| eval(param) }
  urls = FactoryUrl.new(pattern, params)
  sep  = options[:list] ? "\n" : ' '
  if options[:output] || options[:list]
    puts urls.factory.map { |u| u.inspect }.join(sep).gsub('"', "'")
  else
    get *urls.factory
  end
end

#get(url, *urls) ⇒ Object



43
44
45
46
47
48
# File 'lib/web_crawler/application.rb', line 43

def get(url, *urls)
  urls.unshift url

  batch = BatchRequest.new(*urls, symbolized_options)
  batch.process
end

#runner(name) ⇒ Object



30
31
32
33
34
35
36
37
# File 'lib/web_crawler/application.rb', line 30

def runner(name)
  $:.unshift './'
  Array.wrap(@options[:lib]).each { |l| $:.unshift l }
  require name.underscore

  klass = name.classify.constantize
  klass.run allow_format(:json, :yaml)
end

#show_urls(url, *urls) ⇒ Object



55
56
57
58
59
60
# File 'lib/web_crawler/application.rb', line 55

def show_urls(url, *urls)
  urls.unshift url
  batch          = BatchRequest.new(*urls, symbolized_options)
  options[:cols] ||= 1
  Follower.new(batch.process, same_host: options['same-host']).collect.first.in_groups_of(options[:cols], "")
end

#testObject



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/web_crawler/application.rb', line 9

def test
  urls = FactoryUrl.new('http://www.superjob.ru/rabota/554/veb-programmist/?from=$1', [[140]]).factory

  logger.info "start requests with #{urls.join(' ')} in 4 processes"

  targets = BatchRequest.new(urls).process

  logger.info "#{targets.size} targets collected"

  urls = Follower.new(targets, same_host: false).collect { |url| url =~ /vacancy\/\?id=\d+/ }

  logger.info "#{urls.size} urls collected"
  logger.info "start requests with in 4 processes"

  puts BatchRequest.new(urls).process.inspect

  ""
end

#versionObject



79
80
81
# File 'lib/web_crawler/application.rb', line 79

def version
  WebCrawler::VERSION::STRING
end