Class: Xapian::Indexer::Loaders::HTTP

Inherits:
Object
  • Object
show all
Defined in:
lib/xapian/indexer/loaders/http.rb

Constant Summary collapse

UserAgent =
"Xapian-Spider #{Xapian::Indexer::VERSION}"

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ HTTP

Returns a new instance of HTTP.



25
26
27
28
29
# File 'lib/xapian/indexer/loaders/http.rb', line 25

def initialize(options = {})
	@options = options
	
	@logger = options[:logger] || Logger.new($stderr)
end

Instance Method Details

#call(name, &block) ⇒ Object

Extract metadata from the document, including :content and :links



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/xapian/indexer/loaders/http.rb', line 32

def call(name, &block)
	uri = URI.parse(name)
	
	if uri.absolute?
		Net::HTTP.start(uri.host, uri.port) do |http|
			head = http.request_head(uri.path, 'User-Agent' => UserAgent)
		
			body = lambda do
				page = http.request_get(uri.path, 'User-Agent' => UserAgent)
				page.body
			end
		
			@logger.info "Loading external URI: #{name.inspect}"
		
			yield head.code.to_i, head.header, body
		end
		
		return true
	end
	
	return false
end