Class: SiteMapper::Robots

Inherits:
Object
  • Object
show all
Defined in:
lib/site_mapper/robots.rb

Overview

Based on: rubygems.org/gems/robots, v0.10.1 Provided a base URL it checks whether a given URL is allowed to be crawled according to /robots.txt.

Defined Under Namespace

Classes: ParsedRobots

Instance Method Summary collapse

Constructor Details

#initialize(robots_txt, hostname, user_agent) ⇒ Robots


120
121
122
123
124
125
# File 'lib/site_mapper/robots.rb', line 120

def initialize(robots_txt, hostname, user_agent)
  @robots_txt = robots_txt
  @hostname   = hostname
  @user_agent = user_agent
  @parsed     = {}
end

Instance Method Details

#allowed?(uri) ⇒ Boolean

Returns true if uri is allowed to be crawled

Examples:

Check if www.google.com/googlesites is allowed to be crawled

robots = Robots.new('google.com', 'SiteMapper')
robots.allowed?('http://www.google.com/googlesites') # => false (as of 2014-10-22)

132
133
134
135
136
137
# File 'lib/site_mapper/robots.rb', line 132

def allowed?(uri)
  uri  = to_uri(uri)
  host = uri.host
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
  @parsed[host].allowed?(uri, @user_agent)
end

#other_valuesHash

Returns key/value pairs from robots.txt

Examples:

Get other values for google.com

robots = Robots.new('google.com', 'SiteMapper')
robots.other_values

154
155
156
157
158
# File 'lib/site_mapper/robots.rb', line 154

def other_values
  host = @hostname
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
  @parsed[host].other_values
end

#sitemapsArray

Returns array of sitemaps defined in robots.txt

Examples:

Get sitemap for google.com

robots = Robots.new('google.com', 'SiteMapper')
robots.sitemaps

143
144
145
146
147
# File 'lib/site_mapper/robots.rb', line 143

def sitemaps
  host = @hostname
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
  @parsed[host].sitemaps
end