Module: YfAsDataframe::YfConnection

Extended by:
ActiveSupport::Concern
Included in:
Ticker
Defined in:
lib/yf_as_dataframe/yf_connection.rb,
lib/yf_as_dataframe/yf_connection_minimal_patch.rb

Constant Summary collapse

@@user_agent_headers_selection =

“”“ Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations. ”“”

[
  # Chrome - Desktop
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",       # Windows
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",  # Mac
  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",                # Linux
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",       # Windows
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", # Mac
  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",                # Linux
   # Chrome - Mobile
  "Mozilla/5.0 (Linux; Android 15; SM-S931B Build/AP3A.240905.015.A2; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/127.0.6533.103 Mobile Safari/537.36",  # Samsung S25
  "Mozilla/5.0 (Linux; Android 15; Pixel 8 Pro Build/AP4A.250105.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/132.0.6834.163 Mobile Safari/537.36",   # Pixel 8 Pro
  "Mozilla/5.0 (Linux; Android 14; Pixel 9 Pro Build/AD1A.240418.003; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/124.0.6367.54 Mobile Safari/537.36",    # Pixel 9 Pro
  "Mozilla/5.0 (Linux; Android 14; SM-S928B/DS) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.230 Mobile Safari/537.36",                                         # Samsung S24 Ultra
   # Firefox - Desktop
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",       # Windows
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 14.7; rv:135.0) Gecko/20100101 Firefox/135.0",    # Mac
  "Mozilla/5.0 (X11; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0",                # Linux
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0",       # Windows
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 14.7; rv:136.0) Gecko/20100101 Firefox/136.0",    # Mac
  "Mozilla/5.0 (X11; Linux x86_64; rv:136.0) Gecko/20100101 Firefox/136.0",                # Linux
   # Firefox - Mobile
  "Mozilla/5.0 (Android 15; Mobile; SM-G556B/DS; rv:130.0) Gecko/130.0 Firefox/130.0",     # Samsung Xcover7
  "Mozilla/5.0 (Linux; Android 13; Pixel 7 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36", # Pixel 7 Pro
  "Mozilla/5.0 (Linux; Android 13; Pixel 6 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36", # Pixel 6 Pro
  "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36",           # Generic Android
   # Safari - Desktop
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15",      # Mac
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.10 Safari/605.1.15",     # Mac
   # Safari - Mobile
  "Mozilla/5.0 (iPhone; CPU iPhone OS 17_7_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Mobile/15E148 Safari/604.1", # iPhone
  "Mozilla/5.0 (iPad; CPU OS 17_7_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Mobile/15E148 Safari/604.1",         # iPad
   # Edge - Desktop
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0",           # Windows
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/131.0.2903.86",        # Windows
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",            # Windows
   # Edge - Mobile
  "Mozilla/5.0 (Linux; Android 10; OnePlus HD1913) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 EdgA/134.0.0.0", # Android
   # Opera - Desktop
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 OPR/117.0.0.0",            # Windows
   # Opera - Mobile
  "Mozilla/5.0 (Linux; Android 10; Huawei VOG-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.2.4027.0 Mobile Safari/537.36 OPR/76.2.4027.0" # Android
]
@@user_agent_headers =

adding more headers that a browser would often send. it seems they’ve recently implemented fingerprinting. We’re not fingerprinting yet, but this could be closer

{
  "User-Agent" => @@user_agent_headers_selection.sample,
  "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
  "Accept-Language" => "en-US,en;q=0.9",
  "Accept-Encoding" => "gzip, deflate, br",
  "Referer" => "https:://finance.yahoo.com/",
  "Cache-Control" => "max-age=0",
  "Connection" => "keep-alive"
}
@@proxy =
nil

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.enable_curl_impersonate(enabled: true) ⇒ Object



87
88
89
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 87

def enable_curl_impersonate(enabled: true)
  CurlImpersonateIntegration.curl_impersonate_enabled = enabled
end

.enable_curl_impersonate_fallback(enabled: true) ⇒ Object



91
92
93
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 91

def enable_curl_impersonate_fallback(enabled: true)
  CurlImpersonateIntegration.curl_impersonate_fallback = enabled
end

.get_available_curl_impersonate_executablesObject



111
112
113
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 111

def get_available_curl_impersonate_executables
  CurlImpersonateIntegration.available_executables
end

.get_curl_impersonate_configObject



115
116
117
118
119
120
121
122
123
124
125
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 115

def get_curl_impersonate_config
  {
    enabled: CurlImpersonateIntegration.curl_impersonate_enabled,
    fallback: CurlImpersonateIntegration.curl_impersonate_fallback,
    timeout: CurlImpersonateIntegration.curl_impersonate_timeout,
    connect_timeout: CurlImpersonateIntegration.curl_impersonate_connect_timeout,
    process_timeout: CurlImpersonateIntegration.curl_impersonate_process_timeout,
    retries: CurlImpersonateIntegration.curl_impersonate_retries,
    retry_delay: CurlImpersonateIntegration.curl_impersonate_retry_delay
  }
end

.set_curl_impersonate_connect_timeout(timeout) ⇒ Object



99
100
101
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 99

def set_curl_impersonate_connect_timeout(timeout)
  CurlImpersonateIntegration.curl_impersonate_connect_timeout = timeout
end

.set_curl_impersonate_process_timeout(timeout) ⇒ Object



103
104
105
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 103

def set_curl_impersonate_process_timeout(timeout)
  CurlImpersonateIntegration.curl_impersonate_process_timeout = timeout
end

.set_curl_impersonate_retries(retries) ⇒ Object



107
108
109
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 107

def set_curl_impersonate_retries(retries)
  CurlImpersonateIntegration.curl_impersonate_retries = retries
end

.set_curl_impersonate_timeout(timeout) ⇒ Object



95
96
97
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 95

def set_curl_impersonate_timeout(timeout)
  CurlImpersonateIntegration.curl_impersonate_timeout = timeout
end

Instance Method Details

#cache_getObject



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/yf_as_dataframe/yf_connection.rb', line 177

def get(url, headers=nil, params=nil)
  # Check circuit breaker first
  unless circuit_breaker_allow_request?
    raise RuntimeError.new("Circuit breaker is open - too many recent failures. Please try again later.")
  end

  # Add request throttling to be respectful of rate limits
  throttle_request
  
  # Track session usage
  track_session_usage
  
  # Refresh session if needed
  refresh_session_if_needed
  
  # Only fetch crumb for /v7/finance/download endpoint
  crumb_needed = url.include?('/v7/finance/download')

  headers ||= {}
  params ||= {}
  # params.merge!(crumb: @@crumb) unless @@crumb.nil? # Commented out: crumb not needed for most endpoints
  if crumb_needed
    crumb = get_crumb_scrape_quote_page(params[:symbol] || params['symbol'])
    params.merge!(crumb: crumb) unless crumb.nil?
  end
  cookie, _, strategy = _get_cookie_and_crumb(crumb_needed)
  crumbs = {} # crumb logic handled above if needed

  request_args = {
    url: url,
    params: params.merge(crumbs),
    headers: headers || {}
  }

  proxy = _get_proxy
  ::HTTParty.http_proxy(addr = proxy.split(':').first, port = proxy.split(':').second.split('/').first) unless proxy.nil?

  cookie_hash = ::HTTParty::CookieHash.new
  cookie_hash.add_cookies(@@cookie)
  options = { headers: headers.dup.merge(@@user_agent_headers).merge({ 'cookie' => cookie_hash.to_cookie_string })} #,  debug_output: STDOUT }

  u = (request_args[:url]).dup.to_s
  joiner = (request_args[:url].include?('?') ? '&' : '?')
  u += (joiner + URI.encode_www_form(request_args[:params])) unless request_args[:params].empty?

  begin
    response = ::HTTParty.get(u, options)
    if response_failure?(response)
      circuit_breaker_record_failure
      raise RuntimeError.new("Yahoo Finance request failed: #{response.code} - #{response.body}")
    end
    circuit_breaker_record_success
    return response
  rescue => e
    circuit_breaker_record_failure
    raise e
  end
end

#get(url, headers = nil, params = nil) ⇒ Object

Override get method to use curl-impersonate by default



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 13

def get(url, headers=nil, params=nil)
  # Check circuit breaker first
  unless circuit_breaker_allow_request?
    raise RuntimeError.new("Circuit breaker is open - too many recent failures. Please try again later.")
  end

  # Add request throttling to be respectful of rate limits
  throttle_request
  
  # Track session usage
  track_session_usage
  
  # Refresh session if needed
  refresh_session_if_needed
  
  # Only fetch crumb for /v7/finance/download endpoint
  crumb_needed = url.include?('/v7/finance/download')

  headers ||= {}
  params ||= {}
  # params.merge!(crumb: @@crumb) unless @@crumb.nil? # Commented out: crumb not needed for most endpoints
  if crumb_needed
    crumb = get_crumb_scrape_quote_page(params[:symbol] || params['symbol'])
    params.merge!(crumb: crumb) unless crumb.nil?
  end
  cookie, _, strategy = _get_cookie_and_crumb(crumb_needed)
  crumbs = {} # crumb logic handled above if needed

  request_args = {
    url: url,
    params: params.merge(crumbs),
    headers: headers || {}
  }

  proxy = _get_proxy
  ::HTTParty.http_proxy(addr = proxy.split(':').first, port = proxy.split(':').second.split('/').first) unless proxy.nil?

  cookie_hash = ::HTTParty::CookieHash.new
  cookie_hash.add_cookies(@@cookie)
  options = { headers: headers.dup.merge(@@user_agent_headers).merge({ 'cookie' => cookie_hash.to_cookie_string })} #,  debug_output: STDOUT }

  u = (request_args[:url]).dup.to_s
  joiner = (request_args[:url].include?('?') ? '&' : '?')
  u += (joiner + URI.encode_www_form(request_args[:params])) unless request_args[:params].empty?

  begin
    response = ::HTTParty.get(u, options)
    if response_failure?(response)
      circuit_breaker_record_failure
      raise RuntimeError.new("Yahoo Finance request failed: #{response.code} - #{response.body}")
    end
    circuit_breaker_record_success
    return response
  rescue => e
    circuit_breaker_record_failure
    raise e
  end
end

#get_originalObject

Store original methods



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/yf_as_dataframe/yf_connection_minimal_patch.rb', line 9

def get(url, headers=nil, params=nil)
  # Check circuit breaker first
  unless circuit_breaker_allow_request?
    raise RuntimeError.new("Circuit breaker is open - too many recent failures. Please try again later.")
  end

  # Add request throttling to be respectful of rate limits
  throttle_request
  
  # Track session usage
  track_session_usage
  
  # Refresh session if needed
  refresh_session_if_needed
  
  # Only fetch crumb for /v7/finance/download endpoint
  crumb_needed = url.include?('/v7/finance/download')

  headers ||= {}
  params ||= {}
  # params.merge!(crumb: @@crumb) unless @@crumb.nil? # Commented out: crumb not needed for most endpoints
  if crumb_needed
    crumb = get_crumb_scrape_quote_page(params[:symbol] || params['symbol'])
    params.merge!(crumb: crumb) unless crumb.nil?
  end
  cookie, _, strategy = _get_cookie_and_crumb(crumb_needed)
  crumbs = {} # crumb logic handled above if needed

  request_args = {
    url: url,
    params: params.merge(crumbs),
    headers: headers || {}
  }

  proxy = _get_proxy
  ::HTTParty.http_proxy(addr = proxy.split(':').first, port = proxy.split(':').second.split('/').first) unless proxy.nil?

  cookie_hash = ::HTTParty::CookieHash.new
  cookie_hash.add_cookies(@@cookie)
  options = { headers: headers.dup.merge(@@user_agent_headers).merge({ 'cookie' => cookie_hash.to_cookie_string })} #,  debug_output: STDOUT }

  u = (request_args[:url]).dup.to_s
  joiner = (request_args[:url].include?('?') ? '&' : '?')
  u += (joiner + URI.encode_www_form(request_args[:params])) unless request_args[:params].empty?

  begin
    response = ::HTTParty.get(u, options)
    if response_failure?(response)
      circuit_breaker_record_failure
      raise RuntimeError.new("Yahoo Finance request failed: #{response.code} - #{response.body}")
    end
    circuit_breaker_record_success
    return response
  rescue => e
    circuit_breaker_record_failure
    raise e
  end
end

#get_raw_json(url, user_agent_headers = nil, params = nil) ⇒ Object Also known as: get_raw_json_original



180
181
182
183
184
185
186
# File 'lib/yf_as_dataframe/yf_connection.rb', line 180

def get_raw_json(url, user_agent_headers=nil, params=nil)
  # Rails.logger.info { "#{__FILE__}:#{__LINE__} url = #{url.inspect}" }
  response = get(url, user_agent_headers, params)
  # Rails.logger.info { "#{__FILE__}:#{__LINE__} response = #{response.inspect}" }
  # response.raise_for_status()
  return response   #.json()
end

#yfconn_initializeObject



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/yf_as_dataframe/yf_connection.rb', line 87

def yfconn_initialize
  # Rails.logger.info { "#{__FILE__}:#{__LINE__} here"}
  begin
    @@zache = ::Zache.new
    @@session_is_caching = true
  rescue NoMethodError
    # Not caching
    @@session_is_caching = false
  end

  @@crumb = nil
  @@cookie = nil
  @@cookie_strategy = 'basic'
  @@cookie_lock = ::Mutex.new()
  
  # Add session tracking
  @@session_created_at = Time.now
  @@session_refresh_interval = 3600 # 1 hour
  @@request_count = 0
  @@last_request_time = nil
  
  # Circuit breaker state
  @@circuit_breaker_state = :closed # :closed, :open, :half_open
  @@failure_count = 0
  @@last_failure_time = nil
  @@circuit_breaker_threshold = 3
  @@circuit_breaker_timeout = 60 # seconds
  @@circuit_breaker_base_timeout = 60 # seconds
end