Module: YfAsDataframe::CurlImpersonateIntegration
- Defined in:
- lib/yf_as_dataframe/curl_impersonate_integration.rb
Class Attribute Summary collapse
-
.curl_impersonate_connect_timeout ⇒ Object
Returns the value of attribute curl_impersonate_connect_timeout.
-
.curl_impersonate_enabled ⇒ Object
Returns the value of attribute curl_impersonate_enabled.
-
.curl_impersonate_fallback ⇒ Object
Returns the value of attribute curl_impersonate_fallback.
-
.curl_impersonate_process_timeout ⇒ Object
Returns the value of attribute curl_impersonate_process_timeout.
-
.curl_impersonate_retries ⇒ Object
Returns the value of attribute curl_impersonate_retries.
-
.curl_impersonate_retry_delay ⇒ Object
Returns the value of attribute curl_impersonate_retry_delay.
-
.curl_impersonate_timeout ⇒ Object
Returns the value of attribute curl_impersonate_timeout.
Class Method Summary collapse
-
.available_executables ⇒ Object
Find available curl-impersonate executables.
-
.executable_directory ⇒ Object
Get the curl-impersonate executable directory from environment variable or default.
-
.get_random_executable ⇒ Object
Get a random executable.
-
.make_request(url, headers: {}, params: {}, timeout: nil, retries: nil) ⇒ Object
Make a curl-impersonate request with improved timeout handling.
Class Attribute Details
.curl_impersonate_connect_timeout ⇒ Object
Returns the value of attribute curl_impersonate_connect_timeout.
18 19 20 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 18 def curl_impersonate_connect_timeout @curl_impersonate_connect_timeout end |
.curl_impersonate_enabled ⇒ Object
Returns the value of attribute curl_impersonate_enabled.
18 19 20 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 18 def curl_impersonate_enabled @curl_impersonate_enabled end |
.curl_impersonate_fallback ⇒ Object
Returns the value of attribute curl_impersonate_fallback.
18 19 20 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 18 def curl_impersonate_fallback @curl_impersonate_fallback end |
.curl_impersonate_process_timeout ⇒ Object
Returns the value of attribute curl_impersonate_process_timeout.
18 19 20 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 18 def curl_impersonate_process_timeout @curl_impersonate_process_timeout end |
.curl_impersonate_retries ⇒ Object
Returns the value of attribute curl_impersonate_retries.
18 19 20 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 18 def curl_impersonate_retries @curl_impersonate_retries end |
.curl_impersonate_retry_delay ⇒ Object
Returns the value of attribute curl_impersonate_retry_delay.
18 19 20 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 18 def curl_impersonate_retry_delay @curl_impersonate_retry_delay end |
.curl_impersonate_timeout ⇒ Object
Returns the value of attribute curl_impersonate_timeout.
18 19 20 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 18 def curl_impersonate_timeout @curl_impersonate_timeout end |
Class Method Details
.available_executables ⇒ Object
Find available curl-impersonate executables
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 30 def self.available_executables @available_executables ||= begin executables = [] Dir.glob(File.join(executable_directory, "curl_*")).each do |path| executable = File.basename(path) if executable.start_with?('curl_') browser_type = case executable when /^curl_chrome/ then :chrome when /^curl_ff/ then :firefox when /^curl_edge/ then :edge when /^curl_safari/ then :safari else :unknown end executables << { path: path, executable: executable, browser: browser_type } end end executables end end |
.executable_directory ⇒ Object
Get the curl-impersonate executable directory from environment variable or default
25 26 27 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 25 def self.executable_directory ENV['CURL_IMPERSONATE_DIR'] || '/usr/local/bin' end |
.get_random_executable ⇒ Object
Get a random executable
51 52 53 54 55 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 51 def self.get_random_executable available = available_executables return nil if available.empty? available.sample end |
.make_request(url, headers: {}, params: {}, timeout: nil, retries: nil) ⇒ Object
Make a curl-impersonate request with improved timeout handling
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
# File 'lib/yf_as_dataframe/curl_impersonate_integration.rb', line 58 def self.make_request(url, headers: {}, params: {}, timeout: nil, retries: nil) executable_info = get_random_executable return nil unless executable_info timeout ||= @curl_impersonate_timeout retries ||= @curl_impersonate_retries cmd = [ executable_info[:path], "--max-time", timeout.to_s, "--connect-timeout", @curl_impersonate_connect_timeout.to_s, "--retry", retries.to_s, "--retry-delay", @curl_impersonate_retry_delay.to_s, "--retry-max-time", (timeout * 2).to_s, "--fail", "--silent", "--show-error" ] headers.each { |key, value| cmd.concat(["-H", "#{key}: #{value}"]) } unless params.empty? query_string = params.map { |k, v| "#{k}=#{v}" }.join('&') separator = url.include?('?') ? '&' : '?' url = "#{url}#{separator}#{query_string}" end cmd << url # puts "DEBUG: curl-impersonate command: #{cmd.join(' ')}" # puts "DEBUG: curl-impersonate timeout: #{timeout} seconds" begin stdout_str = '' stderr_str = '' status = nil Open3.popen3(*cmd) do |stdin, stdout, stderr, wait_thr| stdin.close pid = wait_thr.pid done = false monitor = Thread.new do sleep(timeout + 10) unless done # puts "DEBUG: Killing curl-impersonate PID \\#{pid} after timeout" Process.kill('TERM', pid) rescue nil sleep(1) Process.kill('KILL', pid) rescue nil if wait_thr.alive? end end stdout_str = stdout.read stderr_str = stderr.read status = wait_thr.value done = true monitor.kill end # puts "DEBUG: curl-impersonate stdout: #{stdout_str[0..200]}..." if stdout_str && !stdout_str.empty? # puts "DEBUG: curl-impersonate stderr: #{stderr_str}" if stderr_str && !stderr_str.empty? # puts "DEBUG: curl-impersonate status: #{status.exitstatus}" if status.success? response = OpenStruct.new response.body = stdout_str response.code = 200 response.define_singleton_method(:success?) { true } response.parsed_response = parse_json_if_possible(stdout_str) response else # puts "DEBUG: curl-impersonate failed with error: \\#{error_message}" = "curl failed with code \\#{status.exitstatus}: \\#{stderr_str}" nil end rescue => e # puts "DEBUG: curl-impersonate exception: \\#{e.message}" nil end end |