18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
# File 'lib/robot_rules.rb', line 18
def parse( text_uri, robots_data )
uri = URI.parse(text_uri)
location = "#{uri.host}:#{uri.port}"
@rules.delete(location)
rules = robots_data.split(/[\015\012]+/).map { |rule| rule.sub(/\s*#.*$/, "") }
anon_rules = Array.new
my_rules = Array.new
current = anon_rules
rules.each do |rule|
case rule
when /^\s*User-Agent\s*:\s*(.+?)\s*$/i
break unless my_rules.empty?
current = if $1 == "*"
anon_rules
elsif $1.downcase.index(@user_agent)
my_rules
else
nil
end
when /^\s*Disallow\s*:\s*(.*?)\s*$/i
next if current.nil?
if $1.empty?
current << nil
else
disallow = URI.parse($1)
next unless disallow.scheme.nil? or disallow.scheme == uri.scheme
next unless disallow.port.nil? or disallow.port == uri.port
next unless disallow.host.nil? or disallow.host.downcase == uri.host.downcase
disallow = disallow.path
disallow = "/" if disallow.empty?
disallow = "/#{disallow}" unless disallow[0] == ?/
current << disallow
end
end
end
@rules[location] = if my_rules.empty?
anon_rules.compact
else
my_rules.compact
end
end
|