Class: Apollo::Fetcher::BaseFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/apollo_crawler/fetcher/base_fetcher.rb

Direct Known Subclasses

SimpleFetcher, SmartFetcher

Class Method Summary collapse

Class Method Details

.fetch(url, options = {}) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/apollo_crawler/fetcher/base_fetcher.rb', line 76

def self.fetch(url, options = {})
	begin
		uri = URI.parse(url.to_s)
	rescue Exception => e
		puts "EXCEPTION: BaseFetcher::fetch() - Unable to fetch: '#{e.to_s}'"
		return nil
	end

	agent = Mechanize.new do |agent|
				agent.user_agent = 'Apollo Crawler'
			end

	page = agent.get(uri)

	res = {
		:status => page.code,
		:headers => page.header.to_hash,
		:body => page.content
	}

	return res
end

.fetch_old(url, options = {}) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/apollo_crawler/fetcher/base_fetcher.rb', line 47

def self.fetch_old(url, options = {})
	begin
		uri = URI.parse(url.to_s)
	rescue Exception => e
		puts "EXCEPTION: BaseFetcher::fetch() - Unable to fetch: '#{e.to_s}'"
		return nil
	end

	# See https://github.com/lostisland/faraday
	conn = Faraday.new(:url => url) do |faraday|
		# faraday.request  :url_encoded             # form-encode POST params
		# faraday.response :logger                  # log requests to STDOUT
		faraday.adapter  Faraday.default_adapter  # make requests with Net::HTTP
	end

	# Make request
	begin
		res = conn.get(uri) do |request|
			request.headers = BaseFetcher.get_fake_headers(uri)
		end
	rescue Exception => e
		puts "EXCEPTION: BaseFetcher::fetch() - Unable to fetch: '#{e.to_s}'"
		return nil
	end

	# Return result
	return res
end

.get_fake_headers(url) ⇒ Object



39
40
41
42
43
44
45
# File 'lib/apollo_crawler/fetcher/base_fetcher.rb', line 39

def self.get_fake_headers(url)
	ip = IPAddr.new(rand(2**32), Socket::AF_INET).to_s

	return {
		"X-Forwarded-For" => ip
	}
end