Class: ScraperRb::Scraper
- Inherits:
-
Object
- Object
- ScraperRb::Scraper
- Defined in:
- lib/scraper_rb.rb
Constant Summary collapse
- VALID_PARAMS =
['auth_password', 'auth_username', 'cookie', 'country', 'referer', 'selector']
Instance Attribute Summary collapse
-
#options ⇒ Object
Returns the value of attribute options.
-
#response ⇒ Object
Returns the value of attribute response.
Instance Method Summary collapse
- #get ⇒ Object
-
#initialize(url, params, extra_headers, timeout) ⇒ Scraper
constructor
A new instance of Scraper.
- #parse(body) ⇒ Object
- #save(filename) ⇒ Object
Constructor Details
#initialize(url, params, extra_headers, timeout) ⇒ Scraper
Returns a new instance of Scraper.
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/scraper_rb.rb', line 33 def initialize(url, params, extra_headers, timeout) params = {} if params == nil default_headers = { 'Accept' => 'application/json', 'apikey' => ENV['PROMPTAPI_TOKEN'], } default_headers.merge!(extra_headers) if extra_headers @options = { url: ENV['PROMPTAPI_TEST_ENDPOINT'] || 'https://api.promptapi.com/scraper', params: {url: url}, request: {timeout: timeout}, headers: default_headers, } params.each do |key, value| @options[:params][key] = value if VALID_PARAMS.map(&:to_sym).include?(key) end @response = {} end |
Instance Attribute Details
#options ⇒ Object
Returns the value of attribute options.
31 32 33 |
# File 'lib/scraper_rb.rb', line 31 def @options end |
#response ⇒ Object
Returns the value of attribute response.
31 32 33 |
# File 'lib/scraper_rb.rb', line 31 def response @response end |
Instance Method Details
#get ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/scraper_rb.rb', line 62 def get unless @options[:headers]['apikey'] @response = {error: "You need to set PROMPTAPI_TOKEN environment variable"} return end conn = Faraday.new(@options) do |c| c.use Faraday::Response::RaiseError c.use CustomURLMiddleware if ENV['RUBY_DEVELOPMENT'] end begin response = conn.get @response = parse(response.body) @response[:data] = @response[:"data-selector"] if @response.key?(:"data-selector") rescue Faraday::ConnectionFailed @response = {error: "Connection error"} rescue Faraday::TimeoutError => e @response = {error: e..capitalize} rescue Faraday::ClientError => e @response = {error: parse(e.response[:body])} rescue Faraday::ServerError => e @response = {error: e..capitalize} end end |
#parse(body) ⇒ Object
54 55 56 57 58 59 60 |
# File 'lib/scraper_rb.rb', line 54 def parse(body) begin JSON.parse(body, symbolize_names: true) rescue JSON::ParserError {error: "JSON decoding error"} end end |
#save(filename) ⇒ Object
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/scraper_rb.rb', line 88 def save(filename) return {error: 'Data is not available'} unless @response[:data] save_extension = '.html' save_data = @response[:data] if @response[:data].class == Array save_extension = '.json' save_data = JSON.generate(@response[:data]) end file_dirname = File.dirname(filename) file_basename = File.basename(filename, save_extension) file_savename = "#{file_dirname}/#{file_basename}#{save_extension}" begin File.open(file_savename, 'w') {|file| file.write(save_data)} return {file: file_savename, size: File.size(file_savename)} rescue Errno::ENOENT => e return {error: "#{e}"} end end |