Class: Robots::ParsedRobots
- Inherits:
-
Object
- Object
- Robots::ParsedRobots
- Defined in:
- lib/robots.rb
Instance Method Summary collapse
- #allowed?(uri, user_agent) ⇒ Boolean
-
#initialize(uri, user_agent) ⇒ ParsedRobots
constructor
A new instance of ParsedRobots.
- #other_values ⇒ Object
Constructor Details
#initialize(uri, user_agent) ⇒ ParsedRobots
Returns a new instance of ParsedRobots.
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/robots.rb', line 12 def initialize(uri, user_agent) @last_accessed = Time.at(1) io = Robots.get_robots_txt(uri, user_agent) if !io || io.content_type != "text/plain" || io.status != ["200", "OK"] io = StringIO.new("User-agent: *\nAllow: /\n") end @other = {} @disallows = {} @allows = {} @delays = {} # added delays to make it work agent = /.*/ io.each do |line| next if line =~ /^\s*(#.*|$)/ arr = line.split(":") key = arr.shift value = arr.join(":").strip value.strip! case key when "User-agent" agent = to_regex(value) when "Allow" @allows[agent] ||= [] @allows[agent] << to_regex(value) when "Disallow" @disallows[agent] ||= [] @disallows[agent] << to_regex(value) when "Crawl-delay" @delays[agent] = value.to_i else @other[key] ||= [] @other[key] << value end end @parsed = true end |
Instance Method Details
#allowed?(uri, user_agent) ⇒ Boolean
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/robots.rb', line 52 def allowed?(uri, user_agent) return true unless @parsed allowed = true path = uri.request_uri @disallows.each do |key, value| if user_agent =~ key value.each do |rule| if path =~ rule allowed = false end end end end @allows.each do |key, value| unless allowed if user_agent =~ key value.each do |rule| if path =~ rule allowed = true end end end end end if allowed && @delays[user_agent] sleep @delays[user_agent] - (Time.now - @last_accessed) @last_accessed = Time.now end return allowed end |
#other_values ⇒ Object
87 88 89 |
# File 'lib/robots.rb', line 87 def other_values @other end |