Class: JsonCompleter
- Inherits:
-
Object
- Object
- JsonCompleter
- Defined in:
- lib/json_completer.rb
Overview
JsonCompleter attempts to turn partial JSON strings into valid JSON. It handles incomplete primitives, missing values, and unclosed structures.
Defined Under Namespace
Classes: ParsingState
Constant Summary collapse
- STRUCTURE_CHARS =
['[', '{', ',', ':'].to_set.freeze
- KEYWORD_MAP =
{ 't' => 'true', 'f' => 'false', 'n' => 'null' }.freeze
- VALID_PRIMITIVES =
%w[true false null].to_set.freeze
Class Method Summary collapse
- .complete(partial_json) ⇒ Object
-
.new_state ⇒ Object
Creates a new parsing state for incremental processing.
Instance Method Summary collapse
-
#complete(partial_json) ⇒ String
Incrementally completes JSON using previous parsing state to avoid reprocessing.
-
#initialize(state = self.class.new_state) ⇒ JsonCompleter
constructor
A new instance of JsonCompleter.
Constructor Details
#initialize(state = self.class.new_state) ⇒ JsonCompleter
Returns a new instance of JsonCompleter.
36 37 38 |
# File 'lib/json_completer.rb', line 36 def initialize(state = self.class.new_state) @state = state end |
Class Method Details
.complete(partial_json) ⇒ Object
27 28 29 |
# File 'lib/json_completer.rb', line 27 def self.complete(partial_json) new.complete(partial_json) end |
.new_state ⇒ Object
Creates a new parsing state for incremental processing
32 33 34 |
# File 'lib/json_completer.rb', line 32 def self.new_state ParsingState.new end |
Instance Method Details
#complete(partial_json) ⇒ String
Incrementally completes JSON using previous parsing state to avoid reprocessing.
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/json_completer.rb', line 44 def complete(partial_json) input = partial_json # Initialize or reuse state if @state.nil? || @state.input_length > input.length # Fresh start or input was truncated - start over @state = ParsingState.new end return input if input.empty? return input if valid_json_primitive_or_document?(input) # If input hasn't grown since last time, just return completed version of existing state if @state.input_length == input.length && !@state.output_tokens.empty? return finalize_completion(@state.output_tokens.dup, @state.context_stack.dup) end # Handle incomplete string from previous state output_tokens = @state.output_tokens.dup context_stack = @state.context_stack.dup index = @state.last_index length = input.length incomplete_string_start = nil incomplete_string_buffer = nil incomplete_string_escape_state = nil # If we had an incomplete string, continue from where we left off if @state.incomplete_string_start incomplete_string_start = @state.incomplete_string_start incomplete_string_buffer = @state.incomplete_string_buffer || StringIO.new('"') incomplete_string_escape_state = @state.incomplete_string_escape_state # Remove the auto-completed string from output_tokens since we'll add the real one output_tokens.pop if output_tokens.last&.start_with?('"') && output_tokens.last.end_with?('"') end # Process from the current index while index < length # Special case: continuing an incomplete string if incomplete_string_buffer && index == @state.last_index str_value, new_index, terminated, new_buffer, new_escape_state = continue_parsing_string( input, incomplete_string_buffer, incomplete_string_escape_state ) if terminated output_tokens << str_value incomplete_string_start = nil incomplete_string_buffer = nil incomplete_string_escape_state = nil # Continue processing from where string ended index = new_index else # String still incomplete, save state incomplete_string_buffer = new_buffer incomplete_string_escape_state = new_escape_state # We've consumed everything index = length end next end char = input[index] last_significant_char_in_output = get_last_significant_char(output_tokens) case char when '{' ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output) ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output) output_tokens << char context_stack << '{' index += 1 when '[' ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output) ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output) output_tokens << char context_stack << '[' index += 1 when '}' # Do not repair missing object values - preserve invalid JSON remove_trailing_comma(output_tokens) output_tokens << char context_stack.pop if !context_stack.empty? && context_stack.last == '{' index += 1 when ']' # Do not repair trailing commas in arrays - preserve invalid JSON output_tokens << char context_stack.pop if !context_stack.empty? && context_stack.last == '[' index += 1 when '"' # Start of a string (key or value) # Start of a new string (incomplete strings are handled at the top of the loop) ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output) ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output) string_start_index = index str_value, consumed, terminated, new_buffer, new_escape_state = parse_string_with_state(input, index) if terminated output_tokens << str_value incomplete_string_start = nil incomplete_string_buffer = nil incomplete_string_escape_state = nil else # String incomplete, save state for next call # Don't add to output_tokens yet - will be added during finalization incomplete_string_start = string_start_index incomplete_string_buffer = new_buffer incomplete_string_escape_state = new_escape_state end index += consumed when ':' # If the char before ':' was a comma, it's likely {"a":1, :"b":2} which is invalid. # Or if it was an opening brace/bracket. # Standard JSON doesn't allow this, but we aim to fix. # A colon should typically follow a string key. # If last char in output was a comma, remove it. remove_trailing_comma(output_tokens) if last_significant_char_in_output == ',' output_tokens << char index += 1 when ',' # Handle cases like `[,` or `{,` or `,,` but do NOT repair `{"key":,` (missing object values) # if last_significant_char_in_output && STRUCTURE_CHARS.include?(last_significant_char_in_output) && last_significant_char_in_output != ':' # output_tokens << 'null' # end remove_trailing_comma(output_tokens) # Avoid double commas output_tokens << char index += 1 when 't', 'f', 'n' # true, false, null ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output) ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output) keyword_val, consumed = consume_and_complete_keyword(input, index, KEYWORD_MAP[char.downcase]) output_tokens << keyword_val index += consumed when '-', '0'..'9' # Number ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output) ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output) num_str, consumed = parse_number(input, index) output_tokens << num_str index += consumed when /\s/ # Whitespace # Preserve whitespace as-is output_tokens << char index += 1 else # Unknown characters # For now, skip unknown characters as they are not part of JSON structure. # More advanced handling could try to wrap them in strings if contextually appropriate. index += 1 end end # Update state updated_state = ParsingState.new( output_tokens: output_tokens, context_stack: context_stack, last_index: index, input_length: length, incomplete_string_start: incomplete_string_start, incomplete_string_buffer: incomplete_string_buffer, incomplete_string_escape_state: incomplete_string_escape_state ) # Return completed JSON and updated state completed_json = finalize_completion(output_tokens.dup, context_stack.dup, incomplete_string_buffer) @state = updated_state completed_json end |