Class: Kudzu::Agent::Robots

Inherits:
Object
  • Object
show all
Defined in:
lib/kudzu/agent/robots.rb,
lib/kudzu/agent/robots/txt.rb,
lib/kudzu/agent/robots/parser.rb

Defined Under Namespace

Classes: Parser, Rule, RuleSet, Txt

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ Robots

Returns a new instance of Robots.



6
7
8
9
10
# File 'lib/kudzu/agent/robots.rb', line 6

def initialize(config)
  @config = config
  @monitor = Monitor.new
  @txt = {}
end

Instance Method Details

#allowed?(uri) ⇒ Boolean

Returns:

  • (Boolean)


12
13
14
15
16
17
# File 'lib/kudzu/agent/robots.rb', line 12

def allowed?(uri)
  uri = Addressable::URI.parse(uri) if uri.is_a?(String)
  set = find_set(uri)
  return true unless set
  set.allowed_path?(uri)
end

#crawl_delay(uri) ⇒ Object



19
20
21
22
23
24
# File 'lib/kudzu/agent/robots.rb', line 19

def crawl_delay(uri)
  uri = Addressable::URI.parse(uri) if uri.is_a?(String)
  set = find_set(uri)
  return nil unless set
  set.crawl_delay
end

#sitemaps(uri) ⇒ Object



26
27
28
29
30
31
# File 'lib/kudzu/agent/robots.rb', line 26

def sitemaps(uri)
  uri = Addressable::URI.parse(uri) if uri.is_a?(String)
  txt = find_txt(uri)
  return [] unless txt
  txt.sitemaps
end