Class: Rack::AI::Utils::Sanitizer

Inherits:
Object
  • Object
show all
Defined in:
lib/rack/ai/utils/sanitizer.rb

Constant Summary collapse

SENSITIVE_PATTERNS =
[
  # API Keys and tokens
  /\b[A-Za-z0-9]{20,}\b/,
  /sk-[A-Za-z0-9]{48}/,  # OpenAI API keys
  /xoxb-[A-Za-z0-9-]+/,  # Slack tokens
  /ghp_[A-Za-z0-9]{36}/,  # GitHub tokens
  
  # Credit card numbers
  /\b(?:\d{4}[-\s]?){3}\d{4}\b/,
  
  # Social Security Numbers
  /\b\d{3}-\d{2}-\d{4}\b/,
  
  # Email addresses (partial)
  /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/,
  
  # Phone numbers
  /\b\d{3}-\d{3}-\d{4}\b/,
  /\(\d{3}\)\s*\d{3}-\d{4}/,
  
  # IP addresses (private ranges)
  /\b(?:10\.|172\.(?:1[6-9]|2[0-9]|3[01])\.|192\.168\.)\d{1,3}\.\d{1,3}\b/
].freeze
SENSITIVE_HEADERS =
%w[
  authorization
  x-api-key
  x-auth-token
  cookie
  set-cookie
  x-session-id
  x-csrf-token
].freeze

Class Method Summary collapse

Class Method Details

.extract_safe_content(content, max_length = 1000) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/rack/ai/utils/sanitizer.rb', line 94

def extract_safe_content(content, max_length = 1000)
  return "" if content.nil? || content.empty?
  
  # Remove sensitive patterns
  safe_content = sanitize_string_for_ai(content.to_s)
  
  # Truncate if too long
  if safe_content.length > max_length
    safe_content = safe_content[0..max_length-4] + "..."
  end
  
  safe_content
end

.redact_pii(text) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/rack/ai/utils/sanitizer.rb', line 108

def redact_pii(text)
  return text unless text.is_a?(String)
  
  redacted = text.dup
  
  # Redact email addresses
  redacted.gsub!(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/, "[EMAIL]")
  
  # Redact phone numbers
  redacted.gsub!(/\b\d{3}-\d{3}-\d{4}\b/, "[PHONE]")
  redacted.gsub!(/\(\d{3}\)\s*\d{3}-\d{4}/, "[PHONE]")
  
  # Redact SSN
  redacted.gsub!(/\b\d{3}-\d{2}-\d{4}\b/, "[SSN]")
  
  # Redact credit card numbers
  redacted.gsub!(/\b(?:\d{4}[-\s]?){3}\d{4}\b/, "[CREDIT_CARD]")
  
  redacted
end

.sanitize_for_ai_processing(data) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/rack/ai/utils/sanitizer.rb', line 77

def sanitize_for_ai_processing(data)
  case data
  when Hash
    sanitized = {}
    data.each do |key, value|
      sanitized[key] = sanitize_for_ai_processing(value)
    end
    sanitized
  when Array
    data.map { |item| sanitize_for_ai_processing(item) }
  when String
    sanitize_string_for_ai(data)
  else
    data
  end
end

.sanitize_request_data(env) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/rack/ai/utils/sanitizer.rb', line 42

def sanitize_request_data(env)
  sanitized_env = {}
  
  env.each do |key, value|
    if sensitive_header?(key)
      sanitized_env[key] = "[REDACTED]"
    elsif key == "rack.input" && value.respond_to?(:read)
      # Don't include request body in sanitized data
      sanitized_env[key] = "[REQUEST_BODY]"
    else
      sanitized_env[key] = sanitize_value(value)
    end
  end
  
  sanitized_env
end

.sanitize_response_data(headers, body) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/rack/ai/utils/sanitizer.rb', line 59

def sanitize_response_data(headers, body)
  sanitized_headers = {}
  headers.each do |key, value|
    if sensitive_header?(key.downcase)
      sanitized_headers[key] = "[REDACTED]"
    else
      sanitized_headers[key] = sanitize_value(value)
    end
  end

  sanitized_body = sanitize_body(body)
  
  {
    headers: sanitized_headers,
    body: sanitized_body
  }
end