Class: JsonCompleter

Inherits:
Object
  • Object
show all
Defined in:
lib/json_completer.rb

Overview

JsonCompleter attempts to turn partial JSON strings into valid JSON. It handles incomplete primitives, missing values, and unclosed structures.

Defined Under Namespace

Classes: ParsingState

Constant Summary collapse

STRUCTURE_CHARS =
['[', '{', ',', ':'].to_set.freeze
KEYWORD_MAP =
{ 't' => 'true', 'f' => 'false', 'n' => 'null' }.freeze
VALID_PRIMITIVES =
%w[true false null].to_set.freeze

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(state = self.class.new_state) ⇒ JsonCompleter

Returns a new instance of JsonCompleter.



36
37
38
# File 'lib/json_completer.rb', line 36

def initialize(state = self.class.new_state)
  @state = state
end

Class Method Details

.complete(partial_json) ⇒ Object



27
28
29
# File 'lib/json_completer.rb', line 27

def self.complete(partial_json)
  new.complete(partial_json)
end

.new_stateObject

Creates a new parsing state for incremental processing



32
33
34
# File 'lib/json_completer.rb', line 32

def self.new_state
  ParsingState.new
end

Instance Method Details

#complete(partial_json) ⇒ String

Incrementally completes JSON using previous parsing state to avoid reprocessing.

Parameters:

  • partial_json (String)

    The current partial JSON string (full accumulated input).

Returns:

  • (String)

    Completed JSON.



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/json_completer.rb', line 44

def complete(partial_json)
  input = partial_json

  # Initialize or reuse state
  if @state.nil? || @state.input_length > input.length
    # Fresh start or input was truncated - start over
    @state = ParsingState.new
  end

  return input if input.empty?
  return input if valid_json_primitive_or_document?(input)

  # If input hasn't grown since last time, just return completed version of existing state
  if @state.input_length == input.length && !@state.output_tokens.empty?
    return finalize_completion(@state.output_tokens.dup, @state.context_stack.dup)
  end

  # Handle incomplete string from previous state
  output_tokens = @state.output_tokens.dup
  context_stack = @state.context_stack.dup
  index = @state.last_index
  length = input.length
  incomplete_string_start = nil
  incomplete_string_buffer = nil
  incomplete_string_escape_state = nil

  # If we had an incomplete string, continue from where we left off
  if @state.incomplete_string_start
    incomplete_string_start = @state.incomplete_string_start
    incomplete_string_buffer = @state.incomplete_string_buffer || StringIO.new('"')
    incomplete_string_escape_state = @state.incomplete_string_escape_state
    # Remove the auto-completed string from output_tokens since we'll add the real one
    output_tokens.pop if output_tokens.last&.start_with?('"') && output_tokens.last.end_with?('"')
  end

  # Process from the current index
  while index < length
    # Special case: continuing an incomplete string
    if incomplete_string_buffer && index == @state.last_index
      str_value, new_index, terminated, new_buffer, new_escape_state = continue_parsing_string(
        input, incomplete_string_buffer, incomplete_string_escape_state
      )
      if terminated
        output_tokens << str_value
        incomplete_string_start = nil
        incomplete_string_buffer = nil
        incomplete_string_escape_state = nil
        # Continue processing from where string ended
        index = new_index
      else
        # String still incomplete, save state
        incomplete_string_buffer = new_buffer
        incomplete_string_escape_state = new_escape_state
        # We've consumed everything
        index = length
      end
      next
    end

    char = input[index]
    last_significant_char_in_output = get_last_significant_char(output_tokens)

    case char
    when '{'
      ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
      ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
      output_tokens << char
      context_stack << '{'
      index += 1
    when '['
      ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
      ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)
      output_tokens << char
      context_stack << '['
      index += 1
    when '}'
      # Do not repair missing object values - preserve invalid JSON
      remove_trailing_comma(output_tokens)
      output_tokens << char
      context_stack.pop if !context_stack.empty? && context_stack.last == '{'
      index += 1
    when ']'
      # Do not repair trailing commas in arrays - preserve invalid JSON
      output_tokens << char
      context_stack.pop if !context_stack.empty? && context_stack.last == '['
      index += 1
    when '"' # Start of a string (key or value)
      # Start of a new string (incomplete strings are handled at the top of the loop)
      ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
      ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)

      string_start_index = index
      str_value, consumed, terminated, new_buffer, new_escape_state = parse_string_with_state(input, index)

      if terminated
        output_tokens << str_value
        incomplete_string_start = nil
        incomplete_string_buffer = nil
        incomplete_string_escape_state = nil
      else
        # String incomplete, save state for next call
        # Don't add to output_tokens yet - will be added during finalization
        incomplete_string_start = string_start_index
        incomplete_string_buffer = new_buffer
        incomplete_string_escape_state = new_escape_state
      end
      index += consumed
    when ':'
      # If the char before ':' was a comma, it's likely {"a":1, :"b":2} which is invalid.
      # Or if it was an opening brace/bracket.
      # Standard JSON doesn't allow this, but we aim to fix.
      # A colon should typically follow a string key.
      # If last char in output was a comma, remove it.
      remove_trailing_comma(output_tokens) if last_significant_char_in_output == ','
      output_tokens << char
      index += 1
    when ','
      # Handle cases like `[,` or `{,` or `,,` but do NOT repair `{"key":,` (missing object values)
      # if last_significant_char_in_output && STRUCTURE_CHARS.include?(last_significant_char_in_output) && last_significant_char_in_output != ':'
      #   output_tokens << 'null'
      # end
      remove_trailing_comma(output_tokens) # Avoid double commas
      output_tokens << char
      index += 1
    when 't', 'f', 'n' # true, false, null
      ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
      ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)

      keyword_val, consumed = consume_and_complete_keyword(input, index, KEYWORD_MAP[char.downcase])
      output_tokens << keyword_val
      index += consumed
    when '-', '0'..'9' # Number
      ensure_comma_before_new_item(output_tokens, context_stack, last_significant_char_in_output)
      ensure_colon_if_value_expected(output_tokens, context_stack, last_significant_char_in_output)

      num_str, consumed = parse_number(input, index)
      output_tokens << num_str
      index += consumed
    when /\s/ # Whitespace
      # Preserve whitespace as-is
      output_tokens << char
      index += 1
    else # Unknown characters
      # For now, skip unknown characters as they are not part of JSON structure.
      # More advanced handling could try to wrap them in strings if contextually appropriate.
      index += 1
    end
  end

  # Update state
  updated_state = ParsingState.new(
    output_tokens: output_tokens,
    context_stack: context_stack,
    last_index: index,
    input_length: length,
    incomplete_string_start: incomplete_string_start,
    incomplete_string_buffer: incomplete_string_buffer,
    incomplete_string_escape_state: incomplete_string_escape_state
  )

  # Return completed JSON and updated state
  completed_json = finalize_completion(output_tokens.dup, context_stack.dup, incomplete_string_buffer)
  @state = updated_state

  completed_json
end