Class: WebScrapingAI::HTMLApi
- Inherits:
-
Object
- Object
- WebScrapingAI::HTMLApi
- Defined in:
- lib/webscraping_ai/api/html_api.rb
Instance Attribute Summary collapse
-
#api_client ⇒ Object
Returns the value of attribute api_client.
Instance Method Summary collapse
-
#get_html(url, opts = {}) ⇒ String
Page HTML by URL Returns the full HTML content of a webpage specified by the URL.
-
#get_html_with_http_info(url, opts = {}) ⇒ Array<(String, Integer, Hash)>
Page HTML by URL Returns the full HTML content of a webpage specified by the URL.
-
#initialize(api_client = ApiClient.default) ⇒ HTMLApi
constructor
A new instance of HTMLApi.
Constructor Details
Instance Attribute Details
#api_client ⇒ Object
Returns the value of attribute api_client.
17 18 19 |
# File 'lib/webscraping_ai/api/html_api.rb', line 17 def api_client @api_client end |
Instance Method Details
#get_html(url, opts = {}) ⇒ String
Page HTML by URL Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
38 39 40 41 |
# File 'lib/webscraping_ai/api/html_api.rb', line 38 def get_html(url, opts = {}) data, _status_code, _headers = get_html_with_http_info(url, opts) data end |
#get_html_with_http_info(url, opts = {}) ⇒ Array<(String, Integer, Hash)>
Page HTML by URL Returns the full HTML content of a webpage specified by the URL. The response is in plain text. Proxies and Chromium JavaScript rendering are used for page retrieval and processing.
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/webscraping_ai/api/html_api.rb', line 59 def get_html_with_http_info(url, opts = {}) if @api_client.config.debugging @api_client.config.logger.debug 'Calling API: HTMLApi.get_html ...' end # verify the required parameter 'url' is set if @api_client.config.client_side_validation && url.nil? fail ArgumentError, "Missing the required parameter 'url' when calling HTMLApi.get_html" end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling HTMLApi.get_html, must be smaller than or equal to 30000.' end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling HTMLApi.get_html, must be greater than or equal to 1.' end if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] > 20000 fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling HTMLApi.get_html, must be smaller than or equal to 20000.' end if @api_client.config.client_side_validation && !opts[:'js_timeout'].nil? && opts[:'js_timeout'] < 1 fail ArgumentError, 'invalid value for "opts[:"js_timeout"]" when calling HTMLApi.get_html, must be greater than or equal to 1.' end allowable_values = ["datacenter", "residential"] if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy']) fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}" end allowable_values = ["us", "gb", "de", "it", "fr", "ca", "es", "ru", "jp", "kr"] if @api_client.config.client_side_validation && opts[:'country'] && !allowable_values.include?(opts[:'country']) fail ArgumentError, "invalid value for \"country\", must be one of #{allowable_values}" end allowable_values = ["desktop", "mobile", "tablet"] if @api_client.config.client_side_validation && opts[:'device'] && !allowable_values.include?(opts[:'device']) fail ArgumentError, "invalid value for \"device\", must be one of #{allowable_values}" end # resource path local_var_path = '/html' # query parameters query_params = opts[:query_params] || {} query_params[:'url'] = url query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil? query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil? query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil? query_params[:'js_timeout'] = opts[:'js_timeout'] if !opts[:'js_timeout'].nil? query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil? query_params[:'country'] = opts[:'country'] if !opts[:'country'].nil? query_params[:'device'] = opts[:'device'] if !opts[:'device'].nil? query_params[:'error_on_404'] = opts[:'error_on_404'] if !opts[:'error_on_404'].nil? query_params[:'error_on_redirect'] = opts[:'error_on_redirect'] if !opts[:'error_on_redirect'].nil? query_params[:'js_script'] = opts[:'js_script'] if !opts[:'js_script'].nil? query_params[:'return_script_result'] = opts[:'return_script_result'] if !opts[:'return_script_result'].nil? # header parameters header_params = opts[:header_params] || {} # HTTP header 'Accept' (if needed) header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) # form parameters form_params = opts[:form_params] || {} # http body (model) post_body = opts[:debug_body] # return_type return_type = opts[:debug_return_type] || 'String' # auth_names auth_names = opts[:debug_auth_names] || ['api_key'] = opts.merge( :operation => :"HTMLApi.get_html", :header_params => header_params, :query_params => query_params, :form_params => form_params, :body => post_body, :auth_names => auth_names, :return_type => return_type ) data, status_code, headers = @api_client.call_api(:GET, local_var_path, ) if @api_client.config.debugging @api_client.config.logger.debug "API called: HTMLApi#get_html\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" end return data, status_code, headers end |