Class: Kudzu::Agent::Robots

Inherits:
Object
  • Object
show all
Defined in:
lib/kudzu/agent/robots.rb,
lib/kudzu/agent/robots/txt.rb,
lib/kudzu/agent/robots/parser.rb

Defined Under Namespace

Classes: Parser, Rule, RuleSet, Txt

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ Robots

Returns a new instance of Robots.


4
5
6
7
8
# File 'lib/kudzu/agent/robots.rb', line 4

def initialize(config)
  @config = config
  @monitor = Monitor.new
  @txt = {}
end

Instance Method Details

#allowed?(uri) ⇒ Boolean

Returns:

  • (Boolean)

10
11
12
13
14
15
# File 'lib/kudzu/agent/robots.rb', line 10

def allowed?(uri)
  uri = Addressable::URI.parse(uri) if uri.is_a?(String)
  set = find_set(uri)
  return true unless set
  set.allowed_path?(uri)
end

#crawl_delay(uri) ⇒ Object


17
18
19
20
21
22
# File 'lib/kudzu/agent/robots.rb', line 17

def crawl_delay(uri)
  uri = Addressable::URI.parse(uri) if uri.is_a?(String)
  set = find_set(uri)
  return nil unless set
  set.crawl_delay
end

#sitemaps(uri) ⇒ Object


24
25
26
27
28
29
# File 'lib/kudzu/agent/robots.rb', line 24

def sitemaps(uri)
  uri = Addressable::URI.parse(uri) if uri.is_a?(String)
  txt = find_txt(uri)
  return [] unless txt
  txt.sitemaps
end