Class: Robots
- Inherits:
-
Object
- Object
- Robots
- Defined in:
- lib/robots.rb
Overview
Robots retrieves and processes the robots.txt file from the target server
Instance Method Summary collapse
- #allowed?(url) ⇒ Boolean
- #contents ⇒ Object
-
#initialize(options) ⇒ Robots
constructor
Processes the robots.txt file.
- #user_agent_settings ⇒ Object
Constructor Details
#initialize(options) ⇒ Robots
Processes the robots.txt file
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/robots.rb', line 5 def initialize() = raise "options should be a hash" unless .kind_of? Hash raise ":url is required" unless .has_key? :url [:file] = "robots.txt" unless .has_key? :file [:user_agent] = "cobweb" unless .has_key? :user_agent uri = URI.parse([:url]) content = Cobweb.new(:cache => nil, :text_mime_types => ["text/html", "application/xhtml+xml", "text/plain"]).get([uri.scheme, "://", uri.host, ":", uri.port, "/", [:file]].join) if content[:mime_type][0..4] == "text/" @raw_data = parse_data(content[:body]) if .has_key?(:user_agent) && @raw_data.has_key?([:user_agent].to_s.downcase.to_sym) @params = @raw_data[[:user_agent].to_s.downcase.to_sym] else raise "Wildcard user-agent is not present" unless @raw_data.has_key? :* @params = @raw_data[:*] end else raise "Invalid mime type: #{content[:content_type]}" end end |
Instance Method Details
#allowed?(url) ⇒ Boolean
28 29 30 31 32 33 34 35 36 37 |
# File 'lib/robots.rb', line 28 def allowed?(url) uri = URI.parse(url) @params[:allow].each do |pattern| return true if uri.path.match(Cobweb.escape_pattern_for_regex(pattern)) end @params[:disallow].each do |pattern| return false if uri.path.match(Cobweb.escape_pattern_for_regex(pattern)) end true end |
#contents ⇒ Object
43 44 45 |
# File 'lib/robots.rb', line 43 def contents @raw_data end |
#user_agent_settings ⇒ Object
39 40 41 |
# File 'lib/robots.rb', line 39 def user_agent_settings @params end |