Module: ArchiveAPI
- Included in:
- WaybackMachineDownloader
- Defined in:
- lib/wayback_machine_downloader/archive_api.rb
Instance Method Summary collapse
Instance Method Details
#get_raw_list_from_api(url, page_index, http) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/wayback_machine_downloader/archive_api.rb', line 6 def get_raw_list_from_api(url, page_index, http) request_url = URI("https://web.archive.org/cdx/search/xd") params = [["output", "json"], ["url", url]] + parameters_for_api(page_index) request_url.query = URI.encode_www_form(params) begin response = http.get(request_url) body = response.body.to_s.strip return [] if body.empty? json = JSON.parse(body) # Check if the response contains the header ["timestamp", "original"] json.shift if json.first == ["timestamp", "original"] json rescue JSON::ParserError, StandardError => e warn "Failed to fetch data from API: #{e.}" [] end end |
#parameters_for_api(page_index) ⇒ Object
26 27 28 29 30 31 32 33 |
# File 'lib/wayback_machine_downloader/archive_api.rb', line 26 def parameters_for_api(page_index) parameters = [["fl", "timestamp,original"], ["collapse", "digest"], ["gzip", "false"]] parameters.push(["filter", "statuscode:200"]) unless @all parameters.push(["from", @from_timestamp.to_s]) if @from_timestamp && @from_timestamp != 0 parameters.push(["to", @to_timestamp.to_s]) if @to_timestamp && @to_timestamp != 0 parameters.push(["page", page_index]) if page_index parameters end |