Class: WebCrawler::FactoryUrl

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/web_crawler/factory_url.rb

Overview

p = FactoryUrl.new “www.somehost.com/:second/:first/”, :first => 0..10, :second => “a”..“z” p.urls #=> [“www.somehost.com/a/1”,

#    "http://www.somehost.com/b/1",
#    "http://www.somehost.com/c/1",
#    ...
#    "http://www.somehost.com/x/10",
#    "http://www.somehost.com/y/10",
#    "http://www.somehost.com/z/10/"]

p = FactoryUrl.new “www.somehost.com/$1/$2?param=$3”, 0..10, “a”..“z”, [3,7,34,876,92] p.urls #=> [“www.somehost.com/1/a?param=3”,

#    "http://www.somehost.com/1/b?param=7",
#    "http://www.somehost.com/1/c?param=34",
#    ...
#    "http://www.somehost.com/10/x?param=34",
#    "http://www.somehost.com/10/y?param=876",
#    "http://www.somehost.com/10/z?param=92"]

p = FactoryUrl.new 0..10, “a”..“z”, [3,7,34,876,92] do |first, second, third|

"http://www.somehost.com/#{first}/#{second}?param=#{third}"

end

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args, &block) ⇒ FactoryUrl

Returns a new instance of FactoryUrl.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/web_crawler/factory_url.rb', line 30

def initialize(*args, &block)
  @options = args.extract_options!
  if block_given?
    @block = block
  else
    @pattern = args.shift
    raise ArgumentError, "first argument must be an url pattern(String)" unless pattern.is_a? String
  end
  
  if @options.empty?
    @params = normalize_arguments(args)
  else
    values, keys = @options.values.map(&:to_a), @options.keys
    values = values.shift.product(*values)
    @params = values.map{|a| Hash[keys.zip(a)]}
  end
end

Instance Attribute Details

#paramsObject (readonly)

Returns the value of attribute params.



28
29
30
# File 'lib/web_crawler/factory_url.rb', line 28

def params
  @params
end

#patternObject (readonly)

Returns the value of attribute pattern.



28
29
30
# File 'lib/web_crawler/factory_url.rb', line 28

def pattern
  @pattern
end

#urlsObject (readonly)

Returns the value of attribute urls.



28
29
30
# File 'lib/web_crawler/factory_url.rb', line 28

def urls
  @urls
end

Instance Method Details

#eachObject



58
59
60
61
62
63
# File 'lib/web_crawler/factory_url.rb', line 58

def each
  @urls = nil
  factory.each do |url|
    yield url
  end
end

#factoryObject



48
49
50
51
52
53
54
55
56
# File 'lib/web_crawler/factory_url.rb', line 48

def factory
  @urls ||= if pattern && params.first.is_a?(Hash)
              params.map { |opts| pattern.gsub(/:([a-z_]+)/) { opts[$1.to_sym] } }
            elsif pattern
              params.map { |opts| pattern.gsub(/\$(\d+)/) { opts[$1.to_i - 1] } }
            else
              params.map { |opts| @block.call *opts }
            end
end