Class: SpeedSpider::Cli
- Inherits:
-
Object
- Object
- SpeedSpider::Cli
- Defined in:
- lib/speed_spider/cli.rb
Instance Attribute Summary collapse
-
#option_parser ⇒ Object
readonly
Returns the value of attribute option_parser.
-
#options ⇒ Object
readonly
Returns the value of attribute options.
Instance Method Summary collapse
-
#initialize ⇒ Cli
constructor
A new instance of Cli.
- #parse! ⇒ Object
Constructor Details
#initialize ⇒ Cli
Returns a new instance of Cli.
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/speed_spider/cli.rb', line 9 def initialize @options = { # only url start with base_url will save to local :base_url => '', # directory for downloaded files to save to :dir => 'download', # run 4 Tentacle threads to fetch pages :threads => 4, # verbose output :verbose => true, # don't throw away the page response body after scanning it for links :discard_page_bodies => false, # identify self as WebCrawler/VERSION :user_agent => "SpeedSpider/#{SpeedSpider::VERSION}", # no delay between requests :delay => 0, # don't obey the robots exclusion protocol :obey_robots_txt => false, # by default, don't limit the depth of the crawl :depth_limit => false, # number of times HTTP redirects will be followed :redirect_limit => 5, # storage engine defaults to Hash in +process_options+ if none specified :storage => nil, # Hash of cookie name => value to send with HTTP requests :cookies => nil, # accept cookies from the server and send them back? :accept_cookies => false, # skip any link with a query string? e.g. http://foo.com/?u=user :skip_query_strings => false, # proxy server hostname :proxy_host => nil, # proxy server port number :proxy_port => false, # HTTP read timeout in seconds :read_timeout => nil } end |
Instance Attribute Details
#option_parser ⇒ Object (readonly)
Returns the value of attribute option_parser.
7 8 9 |
# File 'lib/speed_spider/cli.rb', line 7 def option_parser @option_parser end |
#options ⇒ Object (readonly)
Returns the value of attribute options.
7 8 9 |
# File 'lib/speed_spider/cli.rb', line 7 def @options end |
Instance Method Details
#parse! ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/speed_spider/cli.rb', line 48 def parse! @option_parser = OptionParser.new do |opts| opts. = "Usage: spider [options] start_url" opts.separator "" opts.separator "options:" opts.on('-S', '--slient', 'slient output') do @options[:verbose] = false end opts.on('-D', '--dir String', 'directory for download files to save to. "download" by default') do |value| [:dir] = value end opts.on('-b', '--base_url String', 'any url not starts with base_url will not be saved') do |value| value += '/' unless value.end_with? '/' [:base_url] = value end opts.on('-t', '--threads Integer', Integer, 'threads to run for fetching pages, 4 by default') do |value| @options[:threads] = value end opts.on('-u', '--user_agent String', 'words for request header USER_AGENT') do |value| @options[:user_agent] = value end opts.on('-d', '--delay Integer', Integer, 'delay between requests in seconds') do |value| @options[:delay] = value end opts.on('-o', '--obey_robots_text', 'obey robots exclustion protocol') do @options[:obey_robots_txt] = true end opts.on('-l', '--depth_limit', 'limit the depth of the crawl') do @options[:delay] = true end opts.on('-r', '--redirect_limit Integer', Integer, 'number of times HTTP redirects will be followed') do |value| @options[:redirect_limit] = value end opts.on('-a', '--accept_cookies', 'accept cookies from the server and send them back?') do @options[:accept_cookies] = true end opts.on('-s', '--skip_query_strings', 'skip any link with a query string? e.g. http://foo.com/?u=user') do @options[:skip_query_strings] = true end opts.on('-H', '--proxy_host String', 'proxy server hostname') do |value| @options[:proxy_host] = value end opts.on('-P', '--proxy_port Integer', Integer, 'proxy server port number') do |value| @options[:proxy_port] = value end opts.on('-T', '--read_timeout Integer', Integer, 'HTTP read timeout in seconds') do |value| @options[:read_timeout] = value end # print the version. opts.on_tail("-V", "--version", "Show version") do puts SpeedSpider::VERSION exit end end @option_parser.parse! self end |