Class: Kudzu::Agent::PageFilterer

Inherits:
Object
  • Object
show all
Defined in:
lib/kudzu/agent/page_filterer.rb

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ PageFilterer

Returns a new instance of PageFilterer.


4
5
6
# File 'lib/kudzu/agent/page_filterer.rb', line 4

def initialize(config)
  @config = config
end

Instance Method Details

#allowed?(response) ⇒ Boolean

Returns:

  • (Boolean)

8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/kudzu/agent/page_filterer.rb', line 8

def allowed?(response)
  filter = @config.find_filter(response.url)

  if filter.nil? || (allowed_mime_type?(response.mime_type, filter) &&
                     allowed_size?(response.size, filter) &&
                     allowed_index?(response))
    Kudzu.log :info, "passed page: #{response.url}"
    true
  else
    Kudzu.log :info, "dropped page: #{response.url}"
    false
  end
end

#allowed_response_header?(url, response_header) ⇒ Boolean

Returns:

  • (Boolean)

22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/kudzu/agent/page_filterer.rb', line 22

def allowed_response_header?(url, response_header)
  filter = @config.find_filter(url)

  if response_header['content-type']
    mime_type = Util::ContentTypeParser.parse(response_header['content-type']).first
  end
  if response_header['content-length']
    size = response_header['content-length'].to_i
  end

  filter.nil? || (allowed_mime_type?(mime_type, filter) &&
                  allowed_size?(size, filter))
end