Class: WebRobots::RobotsTxt

Inherits:
Object
  • Object
show all
Defined in:
lib/webrobots/robotstxt.rb

Defined Under Namespace

Classes: AccessControlLine, AgentLine, AllowLine, CrawlDelayLine, DisallowLine, ExtentionLine, Line, Parser, Record

Constant Summary collapse

DISALLOW_ALL =
<<-TXT
User-Agent: *
Disallow: /
TXT

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(site, records, options = nil) ⇒ RobotsTxt

class Parser



533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
# File 'lib/webrobots/robotstxt.rb', line 533

def initialize(site, records, options = nil)
  @timestamp = Time.now
  @site = site
  @options = options || {}
  @last_checked_at = nil

  @error = @options[:error]
  @target = @options[:target]
  @sitemaps = @options[:sitemaps] || []
  @crawl_delay_handler = @options[:crawl_delay_handler]

  if records && !records.empty?
    @records, defaults = [], []
    records.each { |record|
      if record.default?
        defaults << record
      elsif !@target || record.match?(@target)
        @records << record
      end
    }
    @records.concat(defaults)
  else
    @records = []
  end
end

Instance Attribute Details

#errorObject

Returns the value of attribute error.



560
561
562
# File 'lib/webrobots/robotstxt.rb', line 560

def error
  @error
end

#siteObject (readonly)

Returns the value of attribute site.



559
560
561
# File 'lib/webrobots/robotstxt.rb', line 559

def site
  @site
end

#sitemapsObject (readonly)

Returns the value of attribute sitemaps.



559
560
561
# File 'lib/webrobots/robotstxt.rb', line 559

def sitemaps
  @sitemaps
end

#timestampObject (readonly)

Returns the value of attribute timestamp.



559
560
561
# File 'lib/webrobots/robotstxt.rb', line 559

def timestamp
  @timestamp
end

Class Method Details

.unfetchable(site, reason, target = nil) ⇒ Object



610
611
612
613
614
# File 'lib/webrobots/robotstxt.rb', line 610

def self.unfetchable(site, reason, target = nil)
  Parser.new(target).parse(DISALLOW_ALL, site).tap { |robots_txt|
    robots_txt.error = reason
  }
end

Instance Method Details

#allow?(request_uri, user_agent = nil) ⇒ Boolean

Returns:

  • (Boolean)


585
586
587
588
589
590
591
592
593
# File 'lib/webrobots/robotstxt.rb', line 585

def allow?(request_uri, user_agent = nil)
  record = find_record(user_agent) or return true
  allow = record.allow?(request_uri)
  if delay = record.delay and @crawl_delay_handler
    @crawl_delay_handler.call(delay, @last_checked_at)
  end
  @last_checked_at = Time.now
  return allow
end

#crawl_delay(user_agent = nil) ⇒ Object



595
596
597
598
# File 'lib/webrobots/robotstxt.rb', line 595

def crawl_delay(user_agent = nil)
  record = find_record(user_agent) or return 0
  record.delay or return 0
end

#error!Object

Raises:



562
563
564
# File 'lib/webrobots/robotstxt.rb', line 562

def error!
  raise @error if @error
end

#options(user_agent = nil) ⇒ Object



600
601
602
603
# File 'lib/webrobots/robotstxt.rb', line 600

def options(user_agent = nil)
  record = find_record(user_agent) or return {}
  record.options
end