Class: Robots

Inherits:
Object
  • Object
show all
Defined in:
lib/robots.rb

Defined Under Namespace

Classes: ParsedRobots

Constant Summary collapse

DEFAULT_TIMEOUT =
3

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(user_agent) ⇒ Robots

Returns a new instance of Robots.



119
120
121
122
# File 'lib/robots.rb', line 119

def initialize(user_agent)
  @user_agent = user_agent
  @parsed = {}
end

Class Method Details

.get_robots_txt(uri, user_agent) ⇒ Object



101
102
103
104
105
106
107
108
109
# File 'lib/robots.rb', line 101

def self.get_robots_txt(uri, user_agent)
  begin
    Timeout::timeout(Robots.timeout) do
      io = URI.join(uri.to_s, "/robots.txt").open("User-Agent" => user_agent) rescue nil
    end 
  rescue Timeout::Error
    STDERR.puts "robots.txt request timed out"
  end
end

.timeoutObject



115
116
117
# File 'lib/robots.rb', line 115

def self.timeout
  @timeout || DEFAULT_TIMEOUT
end

.timeout=(t) ⇒ Object



111
112
113
# File 'lib/robots.rb', line 111

def self.timeout=(t)
  @timeout = t
end

Instance Method Details

#allowed?(uri) ⇒ Boolean

Returns:

  • (Boolean)


124
125
126
127
128
129
# File 'lib/robots.rb', line 124

def allowed?(uri)
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
  host = uri.host
  @parsed[host] ||= ParsedRobots.new(uri, @user_agent)
  @parsed[host].allowed?(uri, @user_agent)
end

#other_values(uri) ⇒ Object



131
132
133
134
135
136
# File 'lib/robots.rb', line 131

def other_values(uri)
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
  host = uri.host
  @parsed[host] ||= ParsedRobots.new(uri, @user_agent)
  @parsed[host].other_values
end