Method: SelectPdf::PdfToTextClient#text_from_url_async

Defined in:
lib/selectpdf.rb

#text_from_url_async(url) ⇒ Object

Get the text from the specified pdf with an asynchronous call.

Parameters:

  • Address of the PDF file.

Returns:

  • Extracted text.

Raises:



2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
# File 'lib/selectpdf.rb', line 2323

def text_from_url_async(url)
  if !url.downcase.start_with?('http://') && !url.downcase.start_with?('https://')
    raise ApiException.new('The supported protocols for the PDFs available online are http:// and https://.'),
          'The supported protocols for the PDFs available online are http:// and https://.'
  end

  if url.downcase.start_with?('http://localhost')
    raise ApiException.new('Cannot convert local urls via this method. Use getTextFromFile instead.'),
          'Cannot convert local urls via this method. Use text_from_file_async instead.'
  end

  @parameters['action'] = 'Convert'

  @files = {}
  @parameters['url'] = url

  job_id = start_async_job_multipart_form_data

  if job_id.nil? || job_id.empty?
    raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
  end

  no_pings = 0

  while no_pings < @async_calls_max_pings
    no_pings += 1

    # sleep for a few seconds before next ping
    sleep(@async_calls_ping_interval)

    async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
    async_job_client.api_endpoint = @api_async_endpoint

    result = async_job_client.result

    next if result.nil?

    @number_of_pages = async_job_client.number_of_pages

    return result
  end

  raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
        'Asynchronous call did not finish in expected timeframe.'
end