Module: ArchiveAPI

Included in:
WaybackMachineDownloader
Defined in:
lib/wayback_machine_downloader/archive_api.rb

Instance Method Summary collapse

Instance Method Details

#get_raw_list_from_api(url, page_index, http) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/wayback_machine_downloader/archive_api.rb', line 6

def get_raw_list_from_api(url, page_index, http)
  request_url = URI("https://web.archive.org/cdx/search/xd")
  params = [["output", "json"], ["url", url]] + parameters_for_api(page_index)
  request_url.query = URI.encode_www_form(params)

  begin
    response = http.get(request_url)
    body = response.body.to_s.strip
    return [] if body.empty?
    json = JSON.parse(body)

    # Check if the response contains the header ["timestamp", "original"]
    json.shift if json.first == ["timestamp", "original"]
    json
  rescue JSON::ParserError, StandardError => e
    warn "Failed to fetch data from API: #{e.message}"
    []
  end
end

#parameters_for_api(page_index) ⇒ Object



26
27
28
29
30
31
32
33
# File 'lib/wayback_machine_downloader/archive_api.rb', line 26

def parameters_for_api(page_index)
  parameters = [["fl", "timestamp,original"], ["collapse", "digest"], ["gzip", "false"]]
  parameters.push(["filter", "statuscode:200"]) unless @all
  parameters.push(["from", @from_timestamp.to_s]) if @from_timestamp && @from_timestamp != 0
  parameters.push(["to", @to_timestamp.to_s]) if @to_timestamp && @to_timestamp != 0
  parameters.push(["page", page_index]) if page_index
  parameters
end