Class: WebScrapingAI::TextApi
- Inherits:
-
Object
- Object
- WebScrapingAI::TextApi
- Defined in:
- lib/webscraping_ai/api/text_api.rb
Instance Attribute Summary collapse
-
#api_client ⇒ Object
Returns the value of attribute api_client.
Instance Method Summary collapse
-
#get_text(url, opts = {}) ⇒ String
Page text by URL Returns the visible text content of a webpage specified by the URL.
-
#get_text_with_http_info(url, opts = {}) ⇒ Array<(String, Integer, Hash)>
Page text by URL Returns the visible text content of a webpage specified by the URL.
-
#initialize(api_client = ApiClient.default) ⇒ TextApi
constructor
A new instance of TextApi.
Constructor Details
Instance Attribute Details
#api_client ⇒ Object
Returns the value of attribute api_client.
17 18 19 |
# File 'lib/webscraping_ai/api/text_api.rb', line 17 def api_client @api_client end |
Instance Method Details
#get_text(url, opts = {}) ⇒ String
Page text by URL Returns the visible text content of a webpage specified by the URL. Can be used to feed data to GPT or other LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
39 40 41 42 |
# File 'lib/webscraping_ai/api/text_api.rb', line 39 def get_text(url, opts = {}) data, _status_code, _headers = get_text_with_http_info(url, opts) data end |
#get_text_with_http_info(url, opts = {}) ⇒ Array<(String, Integer, Hash)>
Page text by URL Returns the visible text content of a webpage specified by the URL. Can be used to feed data to GPT or other LLM models. The response can be in plain text, JSON, or XML format based on the text_format parameter. Proxies and Chromium JavaScript rendering are used for page retrieval and processing. Returns JSON on error.
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/webscraping_ai/api/text_api.rb', line 61 def get_text_with_http_info(url, opts = {}) if @api_client.config.debugging @api_client.config.logger.debug 'Calling API: TextApi.get_text ...' end # verify the required parameter 'url' is set if @api_client.config.client_side_validation && url.nil? fail ArgumentError, "Missing the required parameter 'url' when calling TextApi.get_text" end allowable_values = ["plain", "xml", "json"] if @api_client.config.client_side_validation && opts[:'text_format'] && !allowable_values.include?(opts[:'text_format']) fail ArgumentError, "invalid value for \"text_format\", must be one of #{allowable_values}" end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling TextApi.get_text, must be smaller than or equal to 30000.' end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling TextApi.get_text, must be greater than or equal to 1.' end if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000 fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling TextApi.get_text, must be smaller than or equal to 20000.' end if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1 fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling TextApi.get_text, must be greater than or equal to 1.' end allowable_values = ["datacenter", "residential"] if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy']) fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}" end allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"] if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country']) fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}" end allowable_values = ["desktop", "mobile", "tablet"] if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device']) fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}" end # resource path local_var_path = '/text' # query parameters query_params = opts[:query_params] || {} query_params[:'url'] = url query_params[:'text_format'] = opts[:'text_format'] if !opts[:'text_format'].nil? query_params[:'return_links'] = opts[:'return_links'] if !opts[:'return_links'].nil? query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil? query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil? query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil? query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil? query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil? query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil? query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil? query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil? query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil? query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil? # header parameters header_params = opts[:header_params] || {} # HTTP header 'Accept' (if needed) header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html', 'text/xml']) # form parameters form_params = opts[:form_params] || {} # http body (model) post_body = opts[:debug_body] # return_type return_type = opts[:debug_return_type] || 'String' # auth_names auth_names = opts[:debug_auth_names] || ['api_key'] = opts.merge( :operation => :"TextApi.get_text", :header_params => header_params, :query_params => query_params, :form_params => form_params, :body => post_body, :auth_names => auth_names, :return_type => return_type ) data, status_code, headers = @api_client.call_api(:GET, local_var_path, ) if @api_client.config.debugging @api_client.config.logger.debug "API called: TextApi#get_text\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" end return data, status_code, headers end |