Class: LlmClassifier::ContentFetchers::Web

Inherits:
Base
  • Object
show all
Defined in:
lib/llm_classifier/content_fetchers/web.rb

Overview

Web content fetcher with SSRF protection

Constant Summary collapse

PRIVATE_IP_RANGES =
[
  IPAddr.new("10.0.0.0/8"),
  IPAddr.new("172.16.0.0/12"),
  IPAddr.new("192.168.0.0/16"),
  IPAddr.new("127.0.0.0/8"),
  IPAddr.new("169.254.0.0/16"),
  IPAddr.new("::1/128"),
  IPAddr.new("fc00::/7"),
  IPAddr.new("fe80::/10")
].freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(timeout: nil, user_agent: nil) ⇒ Web

Returns a new instance of Web.



25
26
27
28
29
30
# File 'lib/llm_classifier/content_fetchers/web.rb', line 25

def initialize(timeout: nil, user_agent: nil)
  super()
  @timeout = timeout || config.web_fetch_timeout
  @user_agent = user_agent || config.web_fetch_user_agent
  @debug_info = {}
end

Instance Attribute Details

#debug_infoObject (readonly)

Returns the value of attribute debug_info.



23
24
25
# File 'lib/llm_classifier/content_fetchers/web.rb', line 23

def debug_info
  @debug_info
end

Instance Method Details

#fetch(url) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/llm_classifier/content_fetchers/web.rb', line 32

def fetch(url)
  return nil if url.nil? || url.empty?

  url = normalize_url(url)
  @debug_info[:url] = url

  response = fetch_url(url)
  return handle_empty_response if response.nil? || response.empty?

  process_successful_response(response)
rescue StandardError => e
  handle_error(e)
end