Class: Rack::AI::Utils::Sanitizer
- Inherits:
-
Object
- Object
- Rack::AI::Utils::Sanitizer
- Defined in:
- lib/rack/ai/utils/sanitizer.rb
Constant Summary collapse
- SENSITIVE_PATTERNS =
[ # API Keys and tokens /\b[A-Za-z0-9]{20,}\b/, /sk-[A-Za-z0-9]{48}/, # OpenAI API keys /xoxb-[A-Za-z0-9-]+/, # Slack tokens /ghp_[A-Za-z0-9]{36}/, # GitHub tokens # Credit card numbers /\b(?:\d{4}[-\s]?){3}\d{4}\b/, # Social Security Numbers /\b\d{3}-\d{2}-\d{4}\b/, # Email addresses (partial) /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/, # Phone numbers /\b\d{3}-\d{3}-\d{4}\b/, /\(\d{3}\)\s*\d{3}-\d{4}/, # IP addresses (private ranges) /\b(?:10\.|172\.(?:1[6-9]|2[0-9]|3[01])\.|192\.168\.)\d{1,3}\.\d{1,3}\b/ ].freeze
- SENSITIVE_HEADERS =
%w[ authorization x-api-key x-auth-token cookie set-cookie x-session-id x-csrf-token ].freeze
Class Method Summary collapse
- .extract_safe_content(content, max_length = 1000) ⇒ Object
- .redact_pii(text) ⇒ Object
- .sanitize_for_ai_processing(data) ⇒ Object
- .sanitize_request_data(env) ⇒ Object
- .sanitize_response_data(headers, body) ⇒ Object
Class Method Details
.extract_safe_content(content, max_length = 1000) ⇒ Object
94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/rack/ai/utils/sanitizer.rb', line 94 def extract_safe_content(content, max_length = 1000) return "" if content.nil? || content.empty? # Remove sensitive patterns safe_content = sanitize_string_for_ai(content.to_s) # Truncate if too long if safe_content.length > max_length safe_content = safe_content[0..max_length-4] + "..." end safe_content end |
.redact_pii(text) ⇒ Object
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# File 'lib/rack/ai/utils/sanitizer.rb', line 108 def redact_pii(text) return text unless text.is_a?(String) redacted = text.dup # Redact email addresses redacted.gsub!(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/, "[EMAIL]") # Redact phone numbers redacted.gsub!(/\b\d{3}-\d{3}-\d{4}\b/, "[PHONE]") redacted.gsub!(/\(\d{3}\)\s*\d{3}-\d{4}/, "[PHONE]") # Redact SSN redacted.gsub!(/\b\d{3}-\d{2}-\d{4}\b/, "[SSN]") # Redact credit card numbers redacted.gsub!(/\b(?:\d{4}[-\s]?){3}\d{4}\b/, "[CREDIT_CARD]") redacted end |
.sanitize_for_ai_processing(data) ⇒ Object
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/rack/ai/utils/sanitizer.rb', line 77 def sanitize_for_ai_processing(data) case data when Hash sanitized = {} data.each do |key, value| sanitized[key] = sanitize_for_ai_processing(value) end sanitized when Array data.map { |item| sanitize_for_ai_processing(item) } when String sanitize_string_for_ai(data) else data end end |
.sanitize_request_data(env) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/rack/ai/utils/sanitizer.rb', line 42 def sanitize_request_data(env) sanitized_env = {} env.each do |key, value| if sensitive_header?(key) sanitized_env[key] = "[REDACTED]" elsif key == "rack.input" && value.respond_to?(:read) # Don't include request body in sanitized data sanitized_env[key] = "[REQUEST_BODY]" else sanitized_env[key] = sanitize_value(value) end end sanitized_env end |
.sanitize_response_data(headers, body) ⇒ Object
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/rack/ai/utils/sanitizer.rb', line 59 def sanitize_response_data(headers, body) sanitized_headers = {} headers.each do |key, value| if sensitive_header?(key.downcase) sanitized_headers[key] = "[REDACTED]" else sanitized_headers[key] = sanitize_value(value) end end sanitized_body = sanitize_body(body) { headers: sanitized_headers, body: sanitized_body } end |