Class: ProxyCrawl::StorageAPI

Inherits:
Object
  • Object
show all
Defined in:
lib/proxycrawl/storage_api.rb

Constant Summary collapse

INVALID_TOKEN =
'Token is required'
INVALID_RID =
'RID is required'
INVALID_RID_ARRAY =
'One or more RIDs are required'
INVALID_URL_OR_RID =
'Either URL or RID is required'
BASE_URL =
'https://api.proxycrawl.com/storage'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ StorageAPI

Returns a new instance of StorageAPI.

Raises:



17
18
19
20
21
22
# File 'lib/proxycrawl/storage_api.rb', line 17

def initialize(options = {})
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?

  @token = options[:token]
  @timeout = options[:timeout] || 120
end

Instance Attribute Details

#bodyObject (readonly)

Returns the value of attribute body.



9
10
11
# File 'lib/proxycrawl/storage_api.rb', line 9

def body
  @body
end

#original_statusObject (readonly)

Returns the value of attribute original_status.



9
10
11
# File 'lib/proxycrawl/storage_api.rb', line 9

def original_status
  @original_status
end

#pc_statusObject (readonly)

Returns the value of attribute pc_status.



9
10
11
# File 'lib/proxycrawl/storage_api.rb', line 9

def pc_status
  @pc_status
end

#ridObject (readonly)

Returns the value of attribute rid.



9
10
11
# File 'lib/proxycrawl/storage_api.rb', line 9

def rid
  @rid
end

#status_codeObject (readonly)

Returns the value of attribute status_code.



9
10
11
# File 'lib/proxycrawl/storage_api.rb', line 9

def status_code
  @status_code
end

#stored_atObject (readonly)

Returns the value of attribute stored_at.



9
10
11
# File 'lib/proxycrawl/storage_api.rb', line 9

def stored_at
  @stored_at
end

#timeoutObject (readonly)

Returns the value of attribute timeout.



9
10
11
# File 'lib/proxycrawl/storage_api.rb', line 9

def timeout
  @timeout
end

#tokenObject (readonly)

Returns the value of attribute token.



9
10
11
# File 'lib/proxycrawl/storage_api.rb', line 9

def token
  @token
end

#urlObject (readonly)

Returns the value of attribute url.



9
10
11
# File 'lib/proxycrawl/storage_api.rb', line 9

def url
  @url
end

Instance Method Details

#bulk(rids_array = []) ⇒ Object

Raises:



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/proxycrawl/storage_api.rb', line 71

def bulk(rids_array = [])
  raise INVALID_RID_ARRAY if rids_array.empty?

  uri = URI("#{BASE_URL}/bulk")
  uri.query = URI.encode_www_form(token: token)
  http = Net::HTTP.new(uri.host)
  request = Net::HTTP::Post.new(uri.request_uri, { 'Content-Type': 'application/json' })
  request.body = { rids: rids_array }.to_json
  response = http.request(request)

  @body = JSON.parse(response.body)
  @original_status = @body.map { |item| item['original_status'].to_i }
  @status_code = response.code.to_i
  @pc_status = @body.map { |item| item['pc_status'].to_i }
  @url = @body.map { |item| item['url'] }
  @rid = @body.map { |item| item['rid'] }
  @stored_at = @body.map { |item| item['stored_at'] }

  self
end

#delete(rid) ⇒ Object

Raises:



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/proxycrawl/storage_api.rb', line 54

def delete(rid)
  raise INVALID_RID if rid.nil? || rid.empty?

  uri = URI(BASE_URL)
  uri.query = URI.encode_www_form(token: token, rid: rid)
  http = Net::HTTP.new(uri.host)
  request = Net::HTTP::Delete.new(uri.request_uri)
  response = http.request(request)

  @url, @original_status, @pc_status, @stored_at = nil
  @status_code = response.code.to_i
  @rid = rid
  @body = JSON.parse(response.body)

  @body.key?('success')
end

#get(url_or_rid, format = 'html') ⇒ Object

Raises:



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/proxycrawl/storage_api.rb', line 24

def get(url_or_rid, format = 'html')
  raise INVALID_URL_OR_RID if url_or_rid.nil? || url_or_rid.empty?

  uri = URI(BASE_URL)
  uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))

  req = Net::HTTP::Get.new(uri)

  req_options = {
    read_timeout: timeout,
    use_ssl: uri.scheme == 'https',
    verify_mode: OpenSSL::SSL::VERIFY_NONE
  }

  response = Net::HTTP.start(uri.hostname, uri.port, req_options) { |http| http.request(req) }

  res = format == 'json' ? JSON.parse(response.body) : response

  @original_status = res['original_status'].to_i
  @pc_status = res['pc_status'].to_i
  @url = res['url']
  @rid = res['rid']
  @stored_at = res['stored_at']

  @status_code = response.code.to_i
  @body = response.body

  self
end

#rids(limit = -1)) ⇒ Object



92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/proxycrawl/storage_api.rb', line 92

def rids(limit = -1)
  uri = URI("#{BASE_URL}/rids")
  query_hash = { token: token }
  query_hash.merge!({ limit: limit }) if limit >= 0
  uri.query = URI.encode_www_form(query_hash)

  response = Net::HTTP.get_response(uri)
  @url, @original_status, @pc_status, @stored_at = nil
  @status_code = response.code.to_i
  @body = JSON.parse(response.body)
  @rid = @body

  @body
end

#total_countObject



107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/proxycrawl/storage_api.rb', line 107

def total_count
  uri = URI("#{BASE_URL}/total_count")
  uri.query = URI.encode_www_form(token: token)

  response = Net::HTTP.get_response(uri)
  @url, @original_status, @pc_status, @stored_at = nil
  @status_code = response.code.to_i
  @rid = rid
  @body = JSON.parse(response.body)

  body['totalCount']
end