Class: WebRobots::RobotsTxt
- Inherits:
-
Object
- Object
- WebRobots::RobotsTxt
- Defined in:
- lib/webrobots/robotstxt.rb
Defined Under Namespace
Classes: AccessControlLine, AgentLine, AllowLine, CrawlDelayLine, DisallowLine, ExtentionLine, Line, Parser, Record
Constant Summary collapse
- DISALLOW_ALL =
<<-TXT User-Agent: * Disallow: / TXT
Instance Attribute Summary collapse
-
#error ⇒ Object
Returns the value of attribute error.
-
#site ⇒ Object
readonly
Returns the value of attribute site.
-
#sitemaps ⇒ Object
readonly
Returns the value of attribute sitemaps.
-
#timestamp ⇒ Object
readonly
Returns the value of attribute timestamp.
Class Method Summary collapse
Instance Method Summary collapse
- #allow?(request_uri, user_agent = nil) ⇒ Boolean
- #crawl_delay(user_agent = nil) ⇒ Object
- #error! ⇒ Object
-
#initialize(site, records, options = nil) ⇒ RobotsTxt
constructor
class Parser.
- #options(user_agent = nil) ⇒ Object
Constructor Details
#initialize(site, records, options = nil) ⇒ RobotsTxt
class Parser
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 |
# File 'lib/webrobots/robotstxt.rb', line 533 def initialize(site, records, = nil) @timestamp = Time.now @site = site @options = || {} @last_checked_at = nil @error = @options[:error] @target = @options[:target] @sitemaps = @options[:sitemaps] || [] @crawl_delay_handler = @options[:crawl_delay_handler] if records && !records.empty? @records, defaults = [], [] records.each { |record| if record.default? defaults << record elsif !@target || record.match?(@target) @records << record end } @records.concat(defaults) else @records = [] end end |
Instance Attribute Details
#error ⇒ Object
Returns the value of attribute error.
560 561 562 |
# File 'lib/webrobots/robotstxt.rb', line 560 def error @error end |
#site ⇒ Object (readonly)
Returns the value of attribute site.
559 560 561 |
# File 'lib/webrobots/robotstxt.rb', line 559 def site @site end |
#sitemaps ⇒ Object (readonly)
Returns the value of attribute sitemaps.
559 560 561 |
# File 'lib/webrobots/robotstxt.rb', line 559 def sitemaps @sitemaps end |
#timestamp ⇒ Object (readonly)
Returns the value of attribute timestamp.
559 560 561 |
# File 'lib/webrobots/robotstxt.rb', line 559 def @timestamp end |
Class Method Details
.unfetchable(site, reason, target = nil) ⇒ Object
610 611 612 613 614 |
# File 'lib/webrobots/robotstxt.rb', line 610 def self.unfetchable(site, reason, target = nil) Parser.new(target).parse(DISALLOW_ALL, site).tap { |robots_txt| robots_txt.error = reason } end |
Instance Method Details
#allow?(request_uri, user_agent = nil) ⇒ Boolean
585 586 587 588 589 590 591 592 593 |
# File 'lib/webrobots/robotstxt.rb', line 585 def allow?(request_uri, user_agent = nil) record = find_record(user_agent) or return true allow = record.allow?(request_uri) if delay = record.delay and @crawl_delay_handler @crawl_delay_handler.call(delay, @last_checked_at) end @last_checked_at = Time.now return allow end |
#crawl_delay(user_agent = nil) ⇒ Object
595 596 597 598 |
# File 'lib/webrobots/robotstxt.rb', line 595 def crawl_delay(user_agent = nil) record = find_record(user_agent) or return 0 record.delay or return 0 end |
#error! ⇒ Object
562 563 564 |
# File 'lib/webrobots/robotstxt.rb', line 562 def error! raise @error if @error end |
#options(user_agent = nil) ⇒ Object
600 601 602 603 |
# File 'lib/webrobots/robotstxt.rb', line 600 def (user_agent = nil) record = find_record(user_agent) or return {} record. end |