Class: RegexpCrawler::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/regexp_crawler/crawler.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Crawler

Returns a new instance of Crawler.



5
6
7
8
9
10
11
12
13
14
15
16
# File 'lib/regexp_crawler/crawler.rb', line 5

def initialize(options = {})
  @start_page = options[:start_page]
  @continue_regexp = options[:continue_regexp]
  @capture_regexp = options[:capture_regexp]
  @named_captures = options[:named_captures]
  @model = options[:model]
  @save_method = options[:save_method]
  @headers = options[:headers]
  @encoding = options[:encoding]
  @need_parse = options[:need_parse]
  @logger = options[:logger] == true ? Logger.new(STDOUT) : options[:logger]
end

Instance Attribute Details

#continue_regexpObject

Returns the value of attribute continue_regexp.



3
4
5
# File 'lib/regexp_crawler/crawler.rb', line 3

def continue_regexp
  @continue_regexp
end

#encodingObject

Returns the value of attribute encoding.



3
4
5
# File 'lib/regexp_crawler/crawler.rb', line 3

def encoding
  @encoding
end

#headersObject

Returns the value of attribute headers.



3
4
5
# File 'lib/regexp_crawler/crawler.rb', line 3

def headers
  @headers
end

#modelObject

Returns the value of attribute model.



3
4
5
# File 'lib/regexp_crawler/crawler.rb', line 3

def model
  @model
end

#named_capturesObject

Returns the value of attribute named_captures.



3
4
5
# File 'lib/regexp_crawler/crawler.rb', line 3

def named_captures
  @named_captures
end

#need_parseObject

Returns the value of attribute need_parse.



3
4
5
# File 'lib/regexp_crawler/crawler.rb', line 3

def need_parse
  @need_parse
end

#save_methodObject

Returns the value of attribute save_method.



3
4
5
# File 'lib/regexp_crawler/crawler.rb', line 3

def save_method
  @save_method
end

#start_pageObject

Returns the value of attribute start_page.



3
4
5
# File 'lib/regexp_crawler/crawler.rb', line 3

def start_page
  @start_page
end

Instance Method Details

#capture_regexp=(regexp) ⇒ Object



18
19
20
# File 'lib/regexp_crawler/crawler.rb', line 18

def capture_regexp=(regexp)
  @capture_regexp = Regexp.new(regexp.source, regexp.options | Regexp::MULTILINE)
end

#logger=(logger) ⇒ Object



22
23
24
# File 'lib/regexp_crawler/crawler.rb', line 22

def logger=(logger)
  @logger = logger == true ? Logger.new(STDOUT) : logger
end

#startObject



26
27
28
29
30
31
32
33
34
35
36
# File 'lib/regexp_crawler/crawler.rb', line 26

def start
  @results = []
  @captured_pages = []
  @pages = [URI.parse(@start_page)]
  while !@pages.empty? and !@stop
    uri = @pages.shift
    @captured_pages << uri
    parse_page(uri)
  end
  @results
end