Class: Pdfcrowd::PdfToTextClient

Inherits:
Object
  • Object
show all
Defined in:
lib/pdfcrowd.rb

Overview

Conversion from PDF to text.

Instance Method Summary collapse

Constructor Details

#initialize(user_name, api_key) ⇒ PdfToTextClient

Returns a new instance of PdfToTextClient.



4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
# File 'lib/pdfcrowd.rb', line 4690

def initialize(user_name, api_key)
    @helper = ConnectionHelper.new(user_name, api_key)
    @fields = {
        'input_format'=>'pdf',
        'output_format'=>'txt'
    }
    @file_id = 1
    @files = {}
    @raw_data = {}
end

Instance Method Details

#convertFile(file) ⇒ Object



4739
4740
4741
4742
4743
4744
4745
4746
# File 'lib/pdfcrowd.rb', line 4739

def convertFile(file)
    if (!(File.file?(file) && !File.zero?(file)))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFile", "pdf-to-text", "The file must exist and not be empty.", "convert_file"), 470);
    end
    
    @files['file'] = file
    @helper.post(@fields, @files, @raw_data)
end

#convertFileToFile(file, file_path) ⇒ Object



4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
# File 'lib/pdfcrowd.rb', line 4759

def convertFileToFile(file, file_path)
    if (!(!file_path.nil? && !file_path.empty?))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_file_to_file"), 470);
    end
    
    output_file = open(file_path, "wb")
    begin
        convertFileToStream(file, output_file)
        output_file.close()
    rescue Error => why
        output_file.close()
        FileUtils.rm(file_path)
        raise
    end
end

#convertFileToStream(file, out_stream) ⇒ Object



4749
4750
4751
4752
4753
4754
4755
4756
# File 'lib/pdfcrowd.rb', line 4749

def convertFileToStream(file, out_stream)
    if (!(File.file?(file) && !File.zero?(file)))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-text", "The file must exist and not be empty.", "convert_file_to_stream"), 470);
    end
    
    @files['file'] = file
    @helper.post(@fields, @files, @raw_data, out_stream)
end

#convertRawData(data) ⇒ Object



4776
4777
4778
4779
# File 'lib/pdfcrowd.rb', line 4776

def convertRawData(data)
    @raw_data['file'] = data
    @helper.post(@fields, @files, @raw_data)
end

#convertRawDataToFile(data, file_path) ⇒ Object



4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
# File 'lib/pdfcrowd.rb', line 4788

def convertRawDataToFile(data, file_path)
    if (!(!file_path.nil? && !file_path.empty?))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_raw_data_to_file"), 470);
    end
    
    output_file = open(file_path, "wb")
    begin
        convertRawDataToStream(data, output_file)
        output_file.close()
    rescue Error => why
        output_file.close()
        FileUtils.rm(file_path)
        raise
    end
end

#convertRawDataToStream(data, out_stream) ⇒ Object



4782
4783
4784
4785
# File 'lib/pdfcrowd.rb', line 4782

def convertRawDataToStream(data, out_stream)
    @raw_data['file'] = data
    @helper.post(@fields, @files, @raw_data, out_stream)
end

#convertStream(in_stream) ⇒ Object



4805
4806
4807
4808
# File 'lib/pdfcrowd.rb', line 4805

def convertStream(in_stream)
    @raw_data['stream'] = in_stream.read
    @helper.post(@fields, @files, @raw_data)
end

#convertStreamToFile(in_stream, file_path) ⇒ Object



4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
# File 'lib/pdfcrowd.rb', line 4817

def convertStreamToFile(in_stream, file_path)
    if (!(!file_path.nil? && !file_path.empty?))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_stream_to_file"), 470);
    end
    
    output_file = open(file_path, "wb")
    begin
        convertStreamToStream(in_stream, output_file)
        output_file.close()
    rescue Error => why
        output_file.close()
        FileUtils.rm(file_path)
        raise
    end
end

#convertStreamToStream(in_stream, out_stream) ⇒ Object



4811
4812
4813
4814
# File 'lib/pdfcrowd.rb', line 4811

def convertStreamToStream(in_stream, out_stream)
    @raw_data['stream'] = in_stream.read
    @helper.post(@fields, @files, @raw_data, out_stream)
end

#convertUrl(url) ⇒ Object



4702
4703
4704
4705
4706
4707
4708
4709
# File 'lib/pdfcrowd.rb', line 4702

def convertUrl(url)
    unless /(?i)^https?:\/\/.*$/.match(url)
        raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrl", "pdf-to-text", "Supported protocols are http:// and https://.", "convert_url"), 470);
    end
    
    @fields['url'] = url
    @helper.post(@fields, @files, @raw_data)
end

#convertUrlToFile(url, file_path) ⇒ Object



4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
# File 'lib/pdfcrowd.rb', line 4722

def convertUrlToFile(url, file_path)
    if (!(!file_path.nil? && !file_path.empty?))
        raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_url_to_file"), 470);
    end
    
    output_file = open(file_path, "wb")
    begin
        convertUrlToStream(url, output_file)
        output_file.close()
    rescue Error => why
        output_file.close()
        FileUtils.rm(file_path)
        raise
    end
end

#convertUrlToStream(url, out_stream) ⇒ Object



4712
4713
4714
4715
4716
4717
4718
4719
# File 'lib/pdfcrowd.rb', line 4712

def convertUrlToStream(url, out_stream)
    unless /(?i)^https?:\/\/.*$/.match(url)
        raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrlToStream::url", "pdf-to-text", "Supported protocols are http:// and https://.", "convert_url_to_stream"), 470);
    end
    
    @fields['url'] = url
    @helper.post(@fields, @files, @raw_data, out_stream)
end

#getConsumedCreditCountObject



4979
4980
4981
# File 'lib/pdfcrowd.rb', line 4979

def getConsumedCreditCount()
    return @helper.getConsumedCreditCount()
end

#getDebugLogUrlObject



4969
4970
4971
# File 'lib/pdfcrowd.rb', line 4969

def getDebugLogUrl()
    return @helper.getDebugLogUrl()
end

#getJobIdObject



4984
4985
4986
# File 'lib/pdfcrowd.rb', line 4984

def getJobId()
    return @helper.getJobId()
end

#getOutputSizeObject



4994
4995
4996
# File 'lib/pdfcrowd.rb', line 4994

def getOutputSize()
    return @helper.getOutputSize()
end

#getPageCountObject



4989
4990
4991
# File 'lib/pdfcrowd.rb', line 4989

def getPageCount()
    return @helper.getPageCount()
end

#getRemainingCreditCountObject



4974
4975
4976
# File 'lib/pdfcrowd.rb', line 4974

def getRemainingCreditCount()
    return @helper.getRemainingCreditCount()
end

#getVersionObject



4999
5000
5001
# File 'lib/pdfcrowd.rb', line 4999

def getVersion()
    return "client " + CLIENT_VERSION + ", API v2, converter " + @helper.getConverterVersion()
end

#setClientUserAgent(agent) ⇒ Object



5036
5037
5038
5039
# File 'lib/pdfcrowd.rb', line 5036

def setClientUserAgent(agent)
    @helper.setUserAgent(agent)
    self
end

#setCropArea(x, y, width, height) ⇒ Object



4954
4955
4956
4957
4958
4959
4960
# File 'lib/pdfcrowd.rb', line 4954

def setCropArea(x, y, width, height)
    setCropAreaX(x)
    setCropAreaY(y)
    setCropAreaWidth(width)
    setCropAreaHeight(height)
    self
end

#setCropAreaHeight(height) ⇒ Object



4944
4945
4946
4947
4948
4949
4950
4951
# File 'lib/pdfcrowd.rb', line 4944

def setCropAreaHeight(height)
    if (!(Integer(height) >= 0))
        raise Error.new(Pdfcrowd.create_invalid_value_message(height, "setCropAreaHeight", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_height"), 470);
    end
    
    @fields['crop_area_height'] = height
    self
end

#setCropAreaWidth(width) ⇒ Object



4934
4935
4936
4937
4938
4939
4940
4941
# File 'lib/pdfcrowd.rb', line 4934

def setCropAreaWidth(width)
    if (!(Integer(width) >= 0))
        raise Error.new(Pdfcrowd.create_invalid_value_message(width, "setCropAreaWidth", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_width"), 470);
    end
    
    @fields['crop_area_width'] = width
    self
end

#setCropAreaX(x) ⇒ Object



4914
4915
4916
4917
4918
4919
4920
4921
# File 'lib/pdfcrowd.rb', line 4914

def setCropAreaX(x)
    if (!(Integer(x) >= 0))
        raise Error.new(Pdfcrowd.create_invalid_value_message(x, "setCropAreaX", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_x"), 470);
    end
    
    @fields['crop_area_x'] = x
    self
end

#setCropAreaY(y) ⇒ Object



4924
4925
4926
4927
4928
4929
4930
4931
# File 'lib/pdfcrowd.rb', line 4924

def setCropAreaY(y)
    if (!(Integer(y) >= 0))
        raise Error.new(Pdfcrowd.create_invalid_value_message(y, "setCropAreaY", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_y"), 470);
    end
    
    @fields['crop_area_y'] = y
    self
end

#setCustomPageBreak(page_break) ⇒ Object



4876
4877
4878
4879
# File 'lib/pdfcrowd.rb', line 4876

def setCustomPageBreak(page_break)
    @fields['custom_page_break'] = page_break
    self
end

#setDebugLog(value) ⇒ Object



4963
4964
4965
4966
# File 'lib/pdfcrowd.rb', line 4963

def setDebugLog(value)
    @fields['debug_log'] = value
    self
end

#setEol(eol) ⇒ Object



4856
4857
4858
4859
4860
4861
4862
4863
# File 'lib/pdfcrowd.rb', line 4856

def setEol(eol)
    unless /(?i)^(unix|dos|mac)$/.match(eol)
        raise Error.new(Pdfcrowd.create_invalid_value_message(eol, "setEol", "pdf-to-text", "Allowed values are unix, dos, mac.", "set_eol"), 470);
    end
    
    @fields['eol'] = eol
    self
end

#setHttpProxy(proxy) ⇒ Object



5010
5011
5012
5013
5014
5015
5016
5017
# File 'lib/pdfcrowd.rb', line 5010

def setHttpProxy(proxy)
    unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
        raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
    end
    
    @fields['http_proxy'] = proxy
    self
end

#setHttpsProxy(proxy) ⇒ Object



5020
5021
5022
5023
5024
5025
5026
5027
# File 'lib/pdfcrowd.rb', line 5020

def setHttpsProxy(proxy)
    unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
        raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
    end
    
    @fields['https_proxy'] = proxy
    self
end

#setLineSpacingThreshold(threshold) ⇒ Object



4892
4893
4894
4895
4896
4897
4898
4899
# File 'lib/pdfcrowd.rb', line 4892

def setLineSpacingThreshold(threshold)
    unless /(?i)^0$|^[0-9]+%$/.match(threshold)
        raise Error.new(Pdfcrowd.create_invalid_value_message(threshold, "setLineSpacingThreshold", "pdf-to-text", "The value must be a positive integer percentage.", "set_line_spacing_threshold"), 470);
    end
    
    @fields['line_spacing_threshold'] = threshold
    self
end

#setNoLayout(value) ⇒ Object



4850
4851
4852
4853
# File 'lib/pdfcrowd.rb', line 4850

def setNoLayout(value)
    @fields['no_layout'] = value
    self
end

#setPageBreakMode(mode) ⇒ Object



4866
4867
4868
4869
4870
4871
4872
4873
# File 'lib/pdfcrowd.rb', line 4866

def setPageBreakMode(mode)
    unless /(?i)^(none|default|custom)$/.match(mode)
        raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setPageBreakMode", "pdf-to-text", "Allowed values are none, default, custom.", "set_page_break_mode"), 470);
    end
    
    @fields['page_break_mode'] = mode
    self
end

#setParagraphMode(mode) ⇒ Object



4882
4883
4884
4885
4886
4887
4888
4889
# File 'lib/pdfcrowd.rb', line 4882

def setParagraphMode(mode)
    unless /(?i)^(none|bounding-box|characters)$/.match(mode)
        raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setParagraphMode", "pdf-to-text", "Allowed values are none, bounding-box, characters.", "set_paragraph_mode"), 470);
    end
    
    @fields['paragraph_mode'] = mode
    self
end

#setPdfPassword(password) ⇒ Object



4834
4835
4836
4837
# File 'lib/pdfcrowd.rb', line 4834

def setPdfPassword(password)
    @fields['pdf_password'] = password
    self
end

#setPrintPageRange(pages) ⇒ Object



4840
4841
4842
4843
4844
4845
4846
4847
# File 'lib/pdfcrowd.rb', line 4840

def setPrintPageRange(pages)
    unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages)
        raise Error.new(Pdfcrowd.create_invalid_value_message(pages, "setPrintPageRange", "pdf-to-text", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470);
    end
    
    @fields['print_page_range'] = pages
    self
end

#setProxy(host, port, user_name, password) ⇒ Object



5048
5049
5050
5051
# File 'lib/pdfcrowd.rb', line 5048

def setProxy(host, port, user_name, password)
    @helper.setProxy(host, port, user_name, password)
    self
end

#setRemoveEmptyLines(value) ⇒ Object



4908
4909
4910
4911
# File 'lib/pdfcrowd.rb', line 4908

def setRemoveEmptyLines(value)
    @fields['remove_empty_lines'] = value
    self
end

#setRemoveHyphenation(value) ⇒ Object



4902
4903
4904
4905
# File 'lib/pdfcrowd.rb', line 4902

def setRemoveHyphenation(value)
    @fields['remove_hyphenation'] = value
    self
end

#setRetryCount(count) ⇒ Object



5054
5055
5056
5057
# File 'lib/pdfcrowd.rb', line 5054

def setRetryCount(count)
    @helper.setRetryCount(count)
    self
end

#setTag(tag) ⇒ Object



5004
5005
5006
5007
# File 'lib/pdfcrowd.rb', line 5004

def setTag(tag)
    @fields['tag'] = tag
    self
end

#setUseHttp(value) ⇒ Object



5030
5031
5032
5033
# File 'lib/pdfcrowd.rb', line 5030

def setUseHttp(value)
    @helper.setUseHttp(value)
    self
end

#setUserAgent(agent) ⇒ Object



5042
5043
5044
5045
# File 'lib/pdfcrowd.rb', line 5042

def setUserAgent(agent)
    @helper.setUserAgent(agent)
    self
end