Class: Pdfcrowd::PdfToTextClient
- Inherits:
-
Object
- Object
- Pdfcrowd::PdfToTextClient
- Defined in:
- lib/pdfcrowd.rb
Overview
Conversion from PDF to text.
Instance Method Summary collapse
- #convertFile(file) ⇒ Object
- #convertFileToFile(file, file_path) ⇒ Object
- #convertFileToStream(file, out_stream) ⇒ Object
- #convertRawData(data) ⇒ Object
- #convertRawDataToFile(data, file_path) ⇒ Object
- #convertRawDataToStream(data, out_stream) ⇒ Object
- #convertStream(in_stream) ⇒ Object
- #convertStreamToFile(in_stream, file_path) ⇒ Object
- #convertStreamToStream(in_stream, out_stream) ⇒ Object
- #convertUrl(url) ⇒ Object
- #convertUrlToFile(url, file_path) ⇒ Object
- #convertUrlToStream(url, out_stream) ⇒ Object
- #getConsumedCreditCount ⇒ Object
- #getDebugLogUrl ⇒ Object
- #getJobId ⇒ Object
- #getOutputSize ⇒ Object
- #getPageCount ⇒ Object
- #getRemainingCreditCount ⇒ Object
- #getVersion ⇒ Object
-
#initialize(user_name, api_key) ⇒ PdfToTextClient
constructor
A new instance of PdfToTextClient.
- #setClientUserAgent(agent) ⇒ Object
- #setCropArea(x, y, width, height) ⇒ Object
- #setCropAreaHeight(height) ⇒ Object
- #setCropAreaWidth(width) ⇒ Object
- #setCropAreaX(x) ⇒ Object
- #setCropAreaY(y) ⇒ Object
- #setCustomPageBreak(page_break) ⇒ Object
- #setDebugLog(value) ⇒ Object
- #setEol(eol) ⇒ Object
- #setHttpProxy(proxy) ⇒ Object
- #setHttpsProxy(proxy) ⇒ Object
- #setLineSpacingThreshold(threshold) ⇒ Object
- #setNoLayout(value) ⇒ Object
- #setPageBreakMode(mode) ⇒ Object
- #setParagraphMode(mode) ⇒ Object
- #setPdfPassword(password) ⇒ Object
- #setPrintPageRange(pages) ⇒ Object
- #setProxy(host, port, user_name, password) ⇒ Object
- #setRemoveEmptyLines(value) ⇒ Object
- #setRemoveHyphenation(value) ⇒ Object
- #setRetryCount(count) ⇒ Object
- #setTag(tag) ⇒ Object
- #setUseHttp(value) ⇒ Object
- #setUserAgent(agent) ⇒ Object
Constructor Details
#initialize(user_name, api_key) ⇒ PdfToTextClient
Returns a new instance of PdfToTextClient.
4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 |
# File 'lib/pdfcrowd.rb', line 4690 def initialize(user_name, api_key) @helper = ConnectionHelper.new(user_name, api_key) @fields = { 'input_format'=>'pdf', 'output_format'=>'txt' } @file_id = 1 @files = {} @raw_data = {} end |
Instance Method Details
#convertFile(file) ⇒ Object
4739 4740 4741 4742 4743 4744 4745 4746 |
# File 'lib/pdfcrowd.rb', line 4739 def convertFile(file) if (!(File.file?(file) && !File.zero?(file))) raise Error.new(Pdfcrowd.(file, "convertFile", "pdf-to-text", "The file must exist and not be empty.", "convert_file"), 470); end @files['file'] = file @helper.post(@fields, @files, @raw_data) end |
#convertFileToFile(file, file_path) ⇒ Object
4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 |
# File 'lib/pdfcrowd.rb', line 4759 def convertFileToFile(file, file_path) if (!(!file_path.nil? && !file_path.empty?)) raise Error.new(Pdfcrowd.(file_path, "convertFileToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_file_to_file"), 470); end output_file = open(file_path, "wb") begin convertFileToStream(file, output_file) output_file.close() rescue Error => why output_file.close() FileUtils.rm(file_path) raise end end |
#convertFileToStream(file, out_stream) ⇒ Object
4749 4750 4751 4752 4753 4754 4755 4756 |
# File 'lib/pdfcrowd.rb', line 4749 def convertFileToStream(file, out_stream) if (!(File.file?(file) && !File.zero?(file))) raise Error.new(Pdfcrowd.(file, "convertFileToStream::file", "pdf-to-text", "The file must exist and not be empty.", "convert_file_to_stream"), 470); end @files['file'] = file @helper.post(@fields, @files, @raw_data, out_stream) end |
#convertRawData(data) ⇒ Object
4776 4777 4778 4779 |
# File 'lib/pdfcrowd.rb', line 4776 def convertRawData(data) @raw_data['file'] = data @helper.post(@fields, @files, @raw_data) end |
#convertRawDataToFile(data, file_path) ⇒ Object
4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 |
# File 'lib/pdfcrowd.rb', line 4788 def convertRawDataToFile(data, file_path) if (!(!file_path.nil? && !file_path.empty?)) raise Error.new(Pdfcrowd.(file_path, "convertRawDataToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_raw_data_to_file"), 470); end output_file = open(file_path, "wb") begin convertRawDataToStream(data, output_file) output_file.close() rescue Error => why output_file.close() FileUtils.rm(file_path) raise end end |
#convertRawDataToStream(data, out_stream) ⇒ Object
4782 4783 4784 4785 |
# File 'lib/pdfcrowd.rb', line 4782 def convertRawDataToStream(data, out_stream) @raw_data['file'] = data @helper.post(@fields, @files, @raw_data, out_stream) end |
#convertStream(in_stream) ⇒ Object
4805 4806 4807 4808 |
# File 'lib/pdfcrowd.rb', line 4805 def convertStream(in_stream) @raw_data['stream'] = in_stream.read @helper.post(@fields, @files, @raw_data) end |
#convertStreamToFile(in_stream, file_path) ⇒ Object
4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 |
# File 'lib/pdfcrowd.rb', line 4817 def convertStreamToFile(in_stream, file_path) if (!(!file_path.nil? && !file_path.empty?)) raise Error.new(Pdfcrowd.(file_path, "convertStreamToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_stream_to_file"), 470); end output_file = open(file_path, "wb") begin convertStreamToStream(in_stream, output_file) output_file.close() rescue Error => why output_file.close() FileUtils.rm(file_path) raise end end |
#convertStreamToStream(in_stream, out_stream) ⇒ Object
4811 4812 4813 4814 |
# File 'lib/pdfcrowd.rb', line 4811 def convertStreamToStream(in_stream, out_stream) @raw_data['stream'] = in_stream.read @helper.post(@fields, @files, @raw_data, out_stream) end |
#convertUrl(url) ⇒ Object
4702 4703 4704 4705 4706 4707 4708 4709 |
# File 'lib/pdfcrowd.rb', line 4702 def convertUrl(url) unless /(?i)^https?:\/\/.*$/.match(url) raise Error.new(Pdfcrowd.(url, "convertUrl", "pdf-to-text", "Supported protocols are http:// and https://.", "convert_url"), 470); end @fields['url'] = url @helper.post(@fields, @files, @raw_data) end |
#convertUrlToFile(url, file_path) ⇒ Object
4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 |
# File 'lib/pdfcrowd.rb', line 4722 def convertUrlToFile(url, file_path) if (!(!file_path.nil? && !file_path.empty?)) raise Error.new(Pdfcrowd.(file_path, "convertUrlToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_url_to_file"), 470); end output_file = open(file_path, "wb") begin convertUrlToStream(url, output_file) output_file.close() rescue Error => why output_file.close() FileUtils.rm(file_path) raise end end |
#convertUrlToStream(url, out_stream) ⇒ Object
4712 4713 4714 4715 4716 4717 4718 4719 |
# File 'lib/pdfcrowd.rb', line 4712 def convertUrlToStream(url, out_stream) unless /(?i)^https?:\/\/.*$/.match(url) raise Error.new(Pdfcrowd.(url, "convertUrlToStream::url", "pdf-to-text", "Supported protocols are http:// and https://.", "convert_url_to_stream"), 470); end @fields['url'] = url @helper.post(@fields, @files, @raw_data, out_stream) end |
#getConsumedCreditCount ⇒ Object
4979 4980 4981 |
# File 'lib/pdfcrowd.rb', line 4979 def getConsumedCreditCount() return @helper.getConsumedCreditCount() end |
#getDebugLogUrl ⇒ Object
4969 4970 4971 |
# File 'lib/pdfcrowd.rb', line 4969 def getDebugLogUrl() return @helper.getDebugLogUrl() end |
#getJobId ⇒ Object
4984 4985 4986 |
# File 'lib/pdfcrowd.rb', line 4984 def getJobId() return @helper.getJobId() end |
#getOutputSize ⇒ Object
4994 4995 4996 |
# File 'lib/pdfcrowd.rb', line 4994 def getOutputSize() return @helper.getOutputSize() end |
#getPageCount ⇒ Object
4989 4990 4991 |
# File 'lib/pdfcrowd.rb', line 4989 def getPageCount() return @helper.getPageCount() end |
#getRemainingCreditCount ⇒ Object
4974 4975 4976 |
# File 'lib/pdfcrowd.rb', line 4974 def getRemainingCreditCount() return @helper.getRemainingCreditCount() end |
#getVersion ⇒ Object
4999 5000 5001 |
# File 'lib/pdfcrowd.rb', line 4999 def getVersion() return "client " + CLIENT_VERSION + ", API v2, converter " + @helper.getConverterVersion() end |
#setClientUserAgent(agent) ⇒ Object
5036 5037 5038 5039 |
# File 'lib/pdfcrowd.rb', line 5036 def setClientUserAgent(agent) @helper.setUserAgent(agent) self end |
#setCropArea(x, y, width, height) ⇒ Object
4954 4955 4956 4957 4958 4959 4960 |
# File 'lib/pdfcrowd.rb', line 4954 def setCropArea(x, y, width, height) setCropAreaX(x) setCropAreaY(y) setCropAreaWidth(width) setCropAreaHeight(height) self end |
#setCropAreaHeight(height) ⇒ Object
4944 4945 4946 4947 4948 4949 4950 4951 |
# File 'lib/pdfcrowd.rb', line 4944 def setCropAreaHeight(height) if (!(Integer(height) >= 0)) raise Error.new(Pdfcrowd.(height, "setCropAreaHeight", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_height"), 470); end @fields['crop_area_height'] = height self end |
#setCropAreaWidth(width) ⇒ Object
4934 4935 4936 4937 4938 4939 4940 4941 |
# File 'lib/pdfcrowd.rb', line 4934 def setCropAreaWidth(width) if (!(Integer(width) >= 0)) raise Error.new(Pdfcrowd.(width, "setCropAreaWidth", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_width"), 470); end @fields['crop_area_width'] = width self end |
#setCropAreaX(x) ⇒ Object
4914 4915 4916 4917 4918 4919 4920 4921 |
# File 'lib/pdfcrowd.rb', line 4914 def setCropAreaX(x) if (!(Integer(x) >= 0)) raise Error.new(Pdfcrowd.(x, "setCropAreaX", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_x"), 470); end @fields['crop_area_x'] = x self end |
#setCropAreaY(y) ⇒ Object
4924 4925 4926 4927 4928 4929 4930 4931 |
# File 'lib/pdfcrowd.rb', line 4924 def setCropAreaY(y) if (!(Integer(y) >= 0)) raise Error.new(Pdfcrowd.(y, "setCropAreaY", "pdf-to-text", "Must be a positive integer or 0.", "set_crop_area_y"), 470); end @fields['crop_area_y'] = y self end |
#setCustomPageBreak(page_break) ⇒ Object
4876 4877 4878 4879 |
# File 'lib/pdfcrowd.rb', line 4876 def setCustomPageBreak(page_break) @fields['custom_page_break'] = page_break self end |
#setDebugLog(value) ⇒ Object
4963 4964 4965 4966 |
# File 'lib/pdfcrowd.rb', line 4963 def setDebugLog(value) @fields['debug_log'] = value self end |
#setEol(eol) ⇒ Object
4856 4857 4858 4859 4860 4861 4862 4863 |
# File 'lib/pdfcrowd.rb', line 4856 def setEol(eol) unless /(?i)^(unix|dos|mac)$/.match(eol) raise Error.new(Pdfcrowd.(eol, "setEol", "pdf-to-text", "Allowed values are unix, dos, mac.", "set_eol"), 470); end @fields['eol'] = eol self end |
#setHttpProxy(proxy) ⇒ Object
5010 5011 5012 5013 5014 5015 5016 5017 |
# File 'lib/pdfcrowd.rb', line 5010 def setHttpProxy(proxy) unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy) raise Error.new(Pdfcrowd.(proxy, "setHttpProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470); end @fields['http_proxy'] = proxy self end |
#setHttpsProxy(proxy) ⇒ Object
5020 5021 5022 5023 5024 5025 5026 5027 |
# File 'lib/pdfcrowd.rb', line 5020 def setHttpsProxy(proxy) unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy) raise Error.new(Pdfcrowd.(proxy, "setHttpsProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470); end @fields['https_proxy'] = proxy self end |
#setLineSpacingThreshold(threshold) ⇒ Object
4892 4893 4894 4895 4896 4897 4898 4899 |
# File 'lib/pdfcrowd.rb', line 4892 def setLineSpacingThreshold(threshold) unless /(?i)^0$|^[0-9]+%$/.match(threshold) raise Error.new(Pdfcrowd.(threshold, "setLineSpacingThreshold", "pdf-to-text", "The value must be a positive integer percentage.", "set_line_spacing_threshold"), 470); end @fields['line_spacing_threshold'] = threshold self end |
#setNoLayout(value) ⇒ Object
4850 4851 4852 4853 |
# File 'lib/pdfcrowd.rb', line 4850 def setNoLayout(value) @fields['no_layout'] = value self end |
#setPageBreakMode(mode) ⇒ Object
4866 4867 4868 4869 4870 4871 4872 4873 |
# File 'lib/pdfcrowd.rb', line 4866 def setPageBreakMode(mode) unless /(?i)^(none|default|custom)$/.match(mode) raise Error.new(Pdfcrowd.(mode, "setPageBreakMode", "pdf-to-text", "Allowed values are none, default, custom.", "set_page_break_mode"), 470); end @fields['page_break_mode'] = mode self end |
#setParagraphMode(mode) ⇒ Object
4882 4883 4884 4885 4886 4887 4888 4889 |
# File 'lib/pdfcrowd.rb', line 4882 def setParagraphMode(mode) unless /(?i)^(none|bounding-box|characters)$/.match(mode) raise Error.new(Pdfcrowd.(mode, "setParagraphMode", "pdf-to-text", "Allowed values are none, bounding-box, characters.", "set_paragraph_mode"), 470); end @fields['paragraph_mode'] = mode self end |
#setPdfPassword(password) ⇒ Object
4834 4835 4836 4837 |
# File 'lib/pdfcrowd.rb', line 4834 def setPdfPassword(password) @fields['pdf_password'] = password self end |
#setPrintPageRange(pages) ⇒ Object
4840 4841 4842 4843 4844 4845 4846 4847 |
# File 'lib/pdfcrowd.rb', line 4840 def setPrintPageRange(pages) unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages) raise Error.new(Pdfcrowd.(pages, "setPrintPageRange", "pdf-to-text", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470); end @fields['print_page_range'] = pages self end |
#setProxy(host, port, user_name, password) ⇒ Object
5048 5049 5050 5051 |
# File 'lib/pdfcrowd.rb', line 5048 def setProxy(host, port, user_name, password) @helper.setProxy(host, port, user_name, password) self end |
#setRemoveEmptyLines(value) ⇒ Object
4908 4909 4910 4911 |
# File 'lib/pdfcrowd.rb', line 4908 def setRemoveEmptyLines(value) @fields['remove_empty_lines'] = value self end |
#setRemoveHyphenation(value) ⇒ Object
4902 4903 4904 4905 |
# File 'lib/pdfcrowd.rb', line 4902 def setRemoveHyphenation(value) @fields['remove_hyphenation'] = value self end |
#setRetryCount(count) ⇒ Object
5054 5055 5056 5057 |
# File 'lib/pdfcrowd.rb', line 5054 def setRetryCount(count) @helper.setRetryCount(count) self end |
#setTag(tag) ⇒ Object
5004 5005 5006 5007 |
# File 'lib/pdfcrowd.rb', line 5004 def setTag(tag) @fields['tag'] = tag self end |
#setUseHttp(value) ⇒ Object
5030 5031 5032 5033 |
# File 'lib/pdfcrowd.rb', line 5030 def setUseHttp(value) @helper.setUseHttp(value) self end |
#setUserAgent(agent) ⇒ Object
5042 5043 5044 5045 |
# File 'lib/pdfcrowd.rb', line 5042 def setUserAgent(agent) @helper.setUserAgent(agent) self end |