Class: PatternRuby::PatternCompiler
- Inherits:
-
Object
- Object
- PatternRuby::PatternCompiler
- Defined in:
- lib/pattern_ruby/pattern_compiler.rb
Defined Under Namespace
Classes: AlternationToken, EntityToken, LiteralToken, OptionalToken
Constant Summary collapse
- MAX_PATTERN_LENGTH =
Maximum pattern string length to prevent ReDoS / excessive compilation cost
10_000
Class Method Summary collapse
Instance Method Summary collapse
- #compile(pattern_string) ⇒ Object
-
#initialize(entity_registry: nil) ⇒ PatternCompiler
constructor
A new instance of PatternCompiler.
- #validate!(pattern_string) ⇒ Object
Constructor Details
#initialize(entity_registry: nil) ⇒ PatternCompiler
Returns a new instance of PatternCompiler.
23 24 25 |
# File 'lib/pattern_ruby/pattern_compiler.rb', line 23 def initialize(entity_registry: nil) @entity_registry = entity_registry end |
Class Method Details
.validate!(pattern_string) ⇒ Object
27 28 29 |
# File 'lib/pattern_ruby/pattern_compiler.rb', line 27 def self.validate!(pattern_string) new.validate!(pattern_string) end |
Instance Method Details
#compile(pattern_string) ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/pattern_ruby/pattern_compiler.rb', line 54 def compile(pattern_string) validate!(pattern_string) tokens = tokenize(pattern_string) entity_names = [] literal_count = 0 regex_parts = [] optional_flags = [] tokens.each do |token| case token when EntityToken entity_names << token.name.to_sym regex_parts << build_entity_regex(token) optional_flags << false when OptionalToken inner = compile_inner(token.content) entity_names.concat(inner[:entity_names]) regex_parts << inner[:regex] optional_flags << true when AlternationToken alts = token.alternatives.map { |a| Regexp.escape(a) } regex_parts << "(?:#{alts.join('|')})" literal_count += 1 optional_flags << false when WildcardToken regex_parts << "(.+)" optional_flags << false when LiteralToken regex_parts << Regexp.escape(token.text) literal_count += 1 optional_flags << false end end token_count = tokens.size entity_count = entity_names.size # Join parts with \s+ separators, wrapping optional parts in (?:...)? regex_str = +"" need_sep = false regex_parts.each_with_index do |part, i| if optional_flags[i] if i == 0 regex_str << "(?:#{part}\\s+)?" need_sep = false else regex_str << "(?:\\s+#{part})?" need_sep = true end else regex_str << "\\s+" if need_sep regex_str << part need_sep = true end end regex = Regexp.new("\\A\\s*#{regex_str}\\s*\\z", Regexp::IGNORECASE) CompiledPattern.new( source: pattern_string, regex: regex, entity_names: entity_names, literal_count: literal_count, token_count: token_count, entity_count: entity_count ) end |
#validate!(pattern_string) ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/pattern_ruby/pattern_compiler.rb', line 31 def validate!(pattern_string) raise ArgumentError, "pattern must be a String, got #{pattern_string.class}" unless pattern_string.is_a?(String) raise ArgumentError, "pattern cannot be nil or empty" if pattern_string.nil? || pattern_string.strip.empty? if pattern_string.length > MAX_PATTERN_LENGTH raise ArgumentError, "pattern exceeds maximum length of #{MAX_PATTERN_LENGTH} characters" end # Check for unbalanced brackets check_balanced(pattern_string, "[", "]", "square brackets") check_balanced(pattern_string, "(", ")", "parentheses") check_balanced(pattern_string, "{", "}", "curly braces") # Check for empty entity names if pattern_string.match?(/\{\s*\}/) raise ArgumentError, "empty entity name {} in pattern" end if pattern_string.match?(/\{\s*:/) raise ArgumentError, "entity name cannot start with ':' in pattern" end true end |