Class: Crawlbase::StorageAPI

Inherits:
Object
  • Object
show all
Defined in:
lib/crawlbase/storage_api.rb

Constant Summary collapse

INVALID_TOKEN =
'Token is required'
INVALID_RID =
'RID is required'
INVALID_RID_ARRAY =
'One or more RIDs are required'
INVALID_URL_OR_RID =
'Either URL or RID is required'
BASE_URL =
'https://api.crawlbase.com/storage'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ StorageAPI

Returns a new instance of StorageAPI.

Raises:



17
18
19
20
21
# File 'lib/crawlbase/storage_api.rb', line 17

def initialize(options = {})
  raise INVALID_TOKEN if options[:token].nil? || options[:token].empty?

  @token = options[:token]
end

Instance Attribute Details

#bodyObject (readonly)

Returns the value of attribute body.



9
10
11
# File 'lib/crawlbase/storage_api.rb', line 9

def body
  @body
end

#original_statusObject (readonly)

Returns the value of attribute original_status.



9
10
11
# File 'lib/crawlbase/storage_api.rb', line 9

def original_status
  @original_status
end

#pc_statusObject (readonly)

Returns the value of attribute pc_status.



9
10
11
# File 'lib/crawlbase/storage_api.rb', line 9

def pc_status
  @pc_status
end

#ridObject (readonly)

Returns the value of attribute rid.



9
10
11
# File 'lib/crawlbase/storage_api.rb', line 9

def rid
  @rid
end

#status_codeObject (readonly)

Returns the value of attribute status_code.



9
10
11
# File 'lib/crawlbase/storage_api.rb', line 9

def status_code
  @status_code
end

#stored_atObject (readonly)

Returns the value of attribute stored_at.



9
10
11
# File 'lib/crawlbase/storage_api.rb', line 9

def stored_at
  @stored_at
end

#tokenObject (readonly)

Returns the value of attribute token.



9
10
11
# File 'lib/crawlbase/storage_api.rb', line 9

def token
  @token
end

#urlObject (readonly)

Returns the value of attribute url.



9
10
11
# File 'lib/crawlbase/storage_api.rb', line 9

def url
  @url
end

Instance Method Details

#bulk(rids_array = []) ⇒ Object

Raises:



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/crawlbase/storage_api.rb', line 61

def bulk(rids_array = [])
  raise INVALID_RID_ARRAY if rids_array.empty?

  uri = URI("#{BASE_URL}/bulk")
  uri.query = URI.encode_www_form(token: token)
  http = Net::HTTP.new(uri.host)
  request = Net::HTTP::Post.new(uri.request_uri, { 'Content-Type': 'application/json' })
  request.body = { rids: rids_array }.to_json
  response = http.request(request)

  @body = JSON.parse(response.body)
  @original_status = @body.map { |item| item['original_status'].to_i }
  @status_code = response.code.to_i
  @pc_status = @body.map { |item| item['pc_status'].to_i }
  @url = @body.map { |item| item['url'] }
  @rid = @body.map { |item| item['rid'] }
  @stored_at = @body.map { |item| item['stored_at'] }

  self
end

#delete(rid) ⇒ Object

Raises:



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/crawlbase/storage_api.rb', line 44

def delete(rid)
  raise INVALID_RID if rid.nil? || rid.empty?

  uri = URI(BASE_URL)
  uri.query = URI.encode_www_form(token: token, rid: rid)
  http = Net::HTTP.new(uri.host)
  request = Net::HTTP::Delete.new(uri.request_uri)
  response = http.request(request)

  @url, @original_status, @pc_status, @stored_at = nil
  @status_code = response.code.to_i
  @rid = rid
  @body = JSON.parse(response.body)

  @body.key?('success')
end

#get(url_or_rid, format = 'html') ⇒ Object

Raises:



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/crawlbase/storage_api.rb', line 23

def get(url_or_rid, format = 'html')
  raise INVALID_URL_OR_RID if url_or_rid.nil? || url_or_rid.empty?

  uri = URI(BASE_URL)
  uri.query = URI.encode_www_form({ token: token, format: format }.merge(decide_url_or_rid(url_or_rid)))
  response = Net::HTTP.get_response(uri)

  res = format == 'json' ? JSON.parse(response.body) : response

  @original_status = res['original_status'].to_i
  @pc_status = res['pc_status'].to_i
  @url = res['url']
  @rid = res['rid']
  @stored_at = res['stored_at']

  @status_code = response.code.to_i
  @body = response.body

  self
end

#rids(limit = -1)) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/crawlbase/storage_api.rb', line 82

def rids(limit = -1)
  uri = URI("#{BASE_URL}/rids")
  query_hash = { token: token }
  query_hash.merge!({ limit: limit }) if limit >= 0
  uri.query = URI.encode_www_form(query_hash)

  response = Net::HTTP.get_response(uri)
  @url, @original_status, @pc_status, @stored_at = nil
  @status_code = response.code.to_i
  @body = JSON.parse(response.body)
  @rid = @body

  @body
end

#total_countObject



97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/crawlbase/storage_api.rb', line 97

def total_count
  uri = URI("#{BASE_URL}/total_count")
  uri.query = URI.encode_www_form(token: token)

  response = Net::HTTP.get_response(uri)
  @url, @original_status, @pc_status, @stored_at = nil
  @status_code = response.code.to_i
  @rid = rid
  @body = JSON.parse(response.body)

  body['totalCount']
end