Module: YfAsDataframe::YfConnection
- Extended by:
- ActiveSupport::Concern
- Included in:
- Ticker
- Defined in:
- lib/yf_as_dataframe/yf_connection.rb,
lib/yf_as_dataframe/yf_connection_minimal_patch.rb
Constant Summary collapse
- @@user_agent_headers_selection =
“”“ Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations. ”“”
[ # Chrome - Desktop "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36", # Windows "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36", # Mac "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36", # Linux "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", # Windows "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", # Mac "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", # Linux # Chrome - Mobile "Mozilla/5.0 (Linux; Android 15; SM-S931B Build/AP3A.240905.015.A2; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/127.0.6533.103 Mobile Safari/537.36", # Samsung S25 "Mozilla/5.0 (Linux; Android 15; Pixel 8 Pro Build/AP4A.250105.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/132.0.6834.163 Mobile Safari/537.36", # Pixel 8 Pro "Mozilla/5.0 (Linux; Android 14; Pixel 9 Pro Build/AD1A.240418.003; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/124.0.6367.54 Mobile Safari/537.36", # Pixel 9 Pro "Mozilla/5.0 (Linux; Android 14; SM-S928B/DS) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.230 Mobile Safari/537.36", # Samsung S24 Ultra # Firefox - Desktop "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0", # Windows "Mozilla/5.0 (Macintosh; Intel Mac OS X 14.7; rv:135.0) Gecko/20100101 Firefox/135.0", # Mac "Mozilla/5.0 (X11; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0", # Linux "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0", # Windows "Mozilla/5.0 (Macintosh; Intel Mac OS X 14.7; rv:136.0) Gecko/20100101 Firefox/136.0", # Mac "Mozilla/5.0 (X11; Linux x86_64; rv:136.0) Gecko/20100101 Firefox/136.0", # Linux # Firefox - Mobile "Mozilla/5.0 (Android 15; Mobile; SM-G556B/DS; rv:130.0) Gecko/130.0 Firefox/130.0", # Samsung Xcover7 "Mozilla/5.0 (Linux; Android 13; Pixel 7 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36", # Pixel 7 Pro "Mozilla/5.0 (Linux; Android 13; Pixel 6 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36", # Pixel 6 Pro "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36", # Generic Android # Safari - Desktop "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15", # Mac "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.10 Safari/605.1.15", # Mac # Safari - Mobile "Mozilla/5.0 (iPhone; CPU iPhone OS 17_7_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Mobile/15E148 Safari/604.1", # iPhone "Mozilla/5.0 (iPad; CPU OS 17_7_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Mobile/15E148 Safari/604.1", # iPad # Edge - Desktop "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0", # Windows "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/131.0.2903.86", # Windows "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0", # Windows # Edge - Mobile "Mozilla/5.0 (Linux; Android 10; OnePlus HD1913) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 EdgA/134.0.0.0", # Android # Opera - Desktop "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 OPR/117.0.0.0", # Windows # Opera - Mobile "Mozilla/5.0 (Linux; Android 10; Huawei VOG-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.2.4027.0 Mobile Safari/537.36 OPR/76.2.4027.0" # Android ]
- @@user_agent_headers =
adding more headers that a browser would often send. it seems they’ve recently implemented fingerprinting. We’re not fingerprinting yet, but this could be closer
{ "User-Agent" => @@user_agent_headers_selection.sample, "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Language" => "en-US,en;q=0.9", "Accept-Encoding" => "gzip, deflate, br", "Referer" => "https:://finance.yahoo.com/", "Cache-Control" => "max-age=0", "Connection" => "keep-alive" }
- @@proxy =
nil
Class Method Summary collapse
- .enable_curl_impersonate(enabled: true) ⇒ Object
- .enable_curl_impersonate_fallback(enabled: true) ⇒ Object
- .get_available_curl_impersonate_executables ⇒ Object
- .get_curl_impersonate_config ⇒ Object
- .set_curl_impersonate_connect_timeout(timeout) ⇒ Object
- .set_curl_impersonate_process_timeout(timeout) ⇒ Object
- .set_curl_impersonate_retries(retries) ⇒ Object
- .set_curl_impersonate_timeout(timeout) ⇒ Object
Instance Method Summary collapse
- #cache_get ⇒ Object
-
#get(url, headers = nil, params = nil) ⇒ Object
Override get method to use curl-impersonate by default.
-
#get_original ⇒ Object
Store original methods.
- #get_raw_json(url, user_agent_headers = nil, params = nil) ⇒ Object (also: #get_raw_json_original)
- #yfconn_initialize ⇒ Object
Class Method Details
.enable_curl_impersonate(enabled: true) ⇒ Object
87 88 89 |
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 87 def enable_curl_impersonate(enabled: true) CurlImpersonateIntegration.curl_impersonate_enabled = enabled end |
.enable_curl_impersonate_fallback(enabled: true) ⇒ Object
91 92 93 |
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 91 def enable_curl_impersonate_fallback(enabled: true) CurlImpersonateIntegration.curl_impersonate_fallback = enabled end |
.get_available_curl_impersonate_executables ⇒ Object
111 112 113 |
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 111 def get_available_curl_impersonate_executables CurlImpersonateIntegration.available_executables end |
.get_curl_impersonate_config ⇒ Object
115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 115 def get_curl_impersonate_config { enabled: CurlImpersonateIntegration.curl_impersonate_enabled, fallback: CurlImpersonateIntegration.curl_impersonate_fallback, timeout: CurlImpersonateIntegration.curl_impersonate_timeout, connect_timeout: CurlImpersonateIntegration.curl_impersonate_connect_timeout, process_timeout: CurlImpersonateIntegration.curl_impersonate_process_timeout, retries: CurlImpersonateIntegration.curl_impersonate_retries, retry_delay: CurlImpersonateIntegration.curl_impersonate_retry_delay } end |
.set_curl_impersonate_connect_timeout(timeout) ⇒ Object
99 100 101 |
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 99 def set_curl_impersonate_connect_timeout(timeout) CurlImpersonateIntegration.curl_impersonate_connect_timeout = timeout end |
.set_curl_impersonate_process_timeout(timeout) ⇒ Object
103 104 105 |
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 103 def set_curl_impersonate_process_timeout(timeout) CurlImpersonateIntegration.curl_impersonate_process_timeout = timeout end |
.set_curl_impersonate_retries(retries) ⇒ Object
107 108 109 |
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 107 def set_curl_impersonate_retries(retries) CurlImpersonateIntegration.curl_impersonate_retries = retries end |
.set_curl_impersonate_timeout(timeout) ⇒ Object
95 96 97 |
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 95 def set_curl_impersonate_timeout(timeout) CurlImpersonateIntegration.curl_impersonate_timeout = timeout end |
Instance Method Details
#cache_get ⇒ Object
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
# File 'lib/yf_as_dataframe/yf_connection.rb', line 177 def get(url, headers=nil, params=nil) # Check circuit breaker first unless circuit_breaker_allow_request? raise RuntimeError.new("Circuit breaker is open - too many recent failures. Please try again later.") end # Add request throttling to be respectful of rate limits throttle_request # Track session usage track_session_usage # Refresh session if needed refresh_session_if_needed # Only fetch crumb for /v7/finance/download endpoint crumb_needed = url.include?('/v7/finance/download') headers ||= {} params ||= {} # params.merge!(crumb: @@crumb) unless @@crumb.nil? # Commented out: crumb not needed for most endpoints if crumb_needed crumb = get_crumb_scrape_quote_page(params[:symbol] || params['symbol']) params.merge!(crumb: crumb) unless crumb.nil? end , _, strategy = (crumb_needed) crumbs = {} # crumb logic handled above if needed request_args = { url: url, params: params.merge(crumbs), headers: headers || {} } proxy = _get_proxy ::HTTParty.http_proxy(addr = proxy.split(':').first, port = proxy.split(':').second.split('/').first) unless proxy.nil? = ::HTTParty::CookieHash.new .(@@cookie) = { headers: headers.dup.merge(@@user_agent_headers).merge({ 'cookie' => . })} #, debug_output: STDOUT } u = (request_args[:url]).dup.to_s joiner = (request_args[:url].include?('?') ? '&' : '?') u += (joiner + URI.encode_www_form(request_args[:params])) unless request_args[:params].empty? begin response = ::HTTParty.get(u, ) if response_failure?(response) circuit_breaker_record_failure raise RuntimeError.new("Yahoo Finance request failed: #{response.code} - #{response.body}") end circuit_breaker_record_success return response rescue => e circuit_breaker_record_failure raise e end end |
#get(url, headers = nil, params = nil) ⇒ Object
Override get method to use curl-impersonate by default
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 13 def get(url, headers=nil, params=nil) # Check circuit breaker first unless circuit_breaker_allow_request? raise RuntimeError.new("Circuit breaker is open - too many recent failures. Please try again later.") end # Add request throttling to be respectful of rate limits throttle_request # Track session usage track_session_usage # Refresh session if needed refresh_session_if_needed # Only fetch crumb for /v7/finance/download endpoint crumb_needed = url.include?('/v7/finance/download') headers ||= {} params ||= {} # params.merge!(crumb: @@crumb) unless @@crumb.nil? # Commented out: crumb not needed for most endpoints if crumb_needed crumb = get_crumb_scrape_quote_page(params[:symbol] || params['symbol']) params.merge!(crumb: crumb) unless crumb.nil? end , _, strategy = (crumb_needed) crumbs = {} # crumb logic handled above if needed request_args = { url: url, params: params.merge(crumbs), headers: headers || {} } proxy = _get_proxy ::HTTParty.http_proxy(addr = proxy.split(':').first, port = proxy.split(':').second.split('/').first) unless proxy.nil? = ::HTTParty::CookieHash.new .(@@cookie) = { headers: headers.dup.merge(@@user_agent_headers).merge({ 'cookie' => . })} #, debug_output: STDOUT } u = (request_args[:url]).dup.to_s joiner = (request_args[:url].include?('?') ? '&' : '?') u += (joiner + URI.encode_www_form(request_args[:params])) unless request_args[:params].empty? begin response = ::HTTParty.get(u, ) if response_failure?(response) circuit_breaker_record_failure raise RuntimeError.new("Yahoo Finance request failed: #{response.code} - #{response.body}") end circuit_breaker_record_success return response rescue => e circuit_breaker_record_failure raise e end end |
#get_original ⇒ Object
Store original methods
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 9 def get(url, headers=nil, params=nil) # Check circuit breaker first unless circuit_breaker_allow_request? raise RuntimeError.new("Circuit breaker is open - too many recent failures. Please try again later.") end # Add request throttling to be respectful of rate limits throttle_request # Track session usage track_session_usage # Refresh session if needed refresh_session_if_needed # Only fetch crumb for /v7/finance/download endpoint crumb_needed = url.include?('/v7/finance/download') headers ||= {} params ||= {} # params.merge!(crumb: @@crumb) unless @@crumb.nil? # Commented out: crumb not needed for most endpoints if crumb_needed crumb = get_crumb_scrape_quote_page(params[:symbol] || params['symbol']) params.merge!(crumb: crumb) unless crumb.nil? end , _, strategy = (crumb_needed) crumbs = {} # crumb logic handled above if needed request_args = { url: url, params: params.merge(crumbs), headers: headers || {} } proxy = _get_proxy ::HTTParty.http_proxy(addr = proxy.split(':').first, port = proxy.split(':').second.split('/').first) unless proxy.nil? = ::HTTParty::CookieHash.new .(@@cookie) = { headers: headers.dup.merge(@@user_agent_headers).merge({ 'cookie' => . })} #, debug_output: STDOUT } u = (request_args[:url]).dup.to_s joiner = (request_args[:url].include?('?') ? '&' : '?') u += (joiner + URI.encode_www_form(request_args[:params])) unless request_args[:params].empty? begin response = ::HTTParty.get(u, ) if response_failure?(response) circuit_breaker_record_failure raise RuntimeError.new("Yahoo Finance request failed: #{response.code} - #{response.body}") end circuit_breaker_record_success return response rescue => e circuit_breaker_record_failure raise e end end |
#get_raw_json(url, user_agent_headers = nil, params = nil) ⇒ Object Also known as: get_raw_json_original
180 181 182 183 184 185 186 |
# File 'lib/yf_as_dataframe/yf_connection.rb', line 180 def get_raw_json(url, user_agent_headers=nil, params=nil) # Rails.logger.info { "#{__FILE__}:#{__LINE__} url = #{url.inspect}" } response = get(url, user_agent_headers, params) # Rails.logger.info { "#{__FILE__}:#{__LINE__} response = #{response.inspect}" } # response.raise_for_status() return response #.json() end |
#yfconn_initialize ⇒ Object
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/yf_as_dataframe/yf_connection.rb', line 87 def yfconn_initialize # Rails.logger.info { "#{__FILE__}:#{__LINE__} here"} begin @@zache = ::Zache.new @@session_is_caching = true rescue NoMethodError # Not caching @@session_is_caching = false end @@crumb = nil @@cookie = nil @@cookie_strategy = 'basic' @@cookie_lock = ::Mutex.new() # Add session tracking @@session_created_at = Time.now @@session_refresh_interval = 3600 # 1 hour @@request_count = 0 @@last_request_time = nil # Circuit breaker state @@circuit_breaker_state = :closed # :closed, :open, :half_open @@failure_count = 0 @@last_failure_time = nil @@circuit_breaker_threshold = 3 @@circuit_breaker_timeout = 60 # seconds @@circuit_breaker_base_timeout = 60 # seconds end |