Class: CSV::Parser::InputsScanner

Inherits:
Object
  • Object
show all
Defined in:
lib/csv/parser.rb

Overview

CSV::InputsScanner receives IO inputs, encoding and the chunk_size. It also controls the life cycle of the object with its methods keep_start, keep_end, keep_back, keep_drop.

CSV::InputsScanner.scan() tries to match with pattern at the current position. If there’s a match, the scanner advances the “scan pointer” and returns the matched string. Otherwise, the scanner returns nil.

CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). If there is no more data (eos? = true), it returns “”.

Instance Method Summary collapse

Constructor Details

#initialize(inputs, encoding, row_separator, chunk_size: 8192) ⇒ InputsScanner

Returns a new instance of InputsScanner.



92
93
94
95
96
97
98
99
100
# File 'lib/csv/parser.rb', line 92

def initialize(inputs, encoding, row_separator, chunk_size: 8192)
  @inputs = inputs.dup
  @encoding = encoding
  @row_separator = row_separator
  @chunk_size = chunk_size
  @last_scanner = @inputs.empty?
  @keeps = []
  read_chunk
end

Instance Method Details

#check(pattern) ⇒ Object



251
252
253
# File 'lib/csv/parser.rb', line 251

def check(pattern)
  @scanner.check(pattern)
end

#each_line(row_separator) {|buffer| ... } ⇒ Object

Yields:

  • (buffer)


102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/csv/parser.rb', line 102

def each_line(row_separator)
  return enum_for(__method__, row_separator) unless block_given?
  buffer = nil
  input = @scanner.rest
  position = @scanner.pos
  offset = 0
  n_row_separator_chars = row_separator.size
  # trace(__method__, :start, line, input)
  while true
    input.each_line(row_separator) do |line|
      @scanner.pos += line.bytesize
      if buffer
        if n_row_separator_chars == 2 and
          buffer.end_with?(row_separator[0]) and
          line.start_with?(row_separator[1])
          buffer << line[0]
          line = line[1..-1]
          position += buffer.bytesize + offset
          @scanner.pos = position
          offset = 0
          yield(buffer)
          buffer = nil
          next if line.empty?
        else
          buffer << line
          line = buffer
          buffer = nil
        end
      end
      if line.end_with?(row_separator)
        position += line.bytesize + offset
        @scanner.pos = position
        offset = 0
        yield(line)
      else
        buffer = line
      end
    end
    break unless read_chunk
    input = @scanner.rest
    position = @scanner.pos
    offset = -buffer.bytesize if buffer
  end
  yield(buffer) if buffer
end

#eos?Boolean

Returns:

  • (Boolean)


174
175
176
# File 'lib/csv/parser.rb', line 174

def eos?
  @scanner.eos?
end

#keep_backObject



201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'lib/csv/parser.rb', line 201

def keep_back
  # trace(__method__, :start)
  scanner, start, buffer = @keeps.pop
  if buffer
    # trace(__method__, :rescan, start, buffer)
    string = @scanner.string
    if scanner == @scanner
      keep = string.byteslice(start, string.bytesize - start)
    else
      keep = string
    end
    if keep and not keep.empty?
      @inputs.unshift(StringIO.new(keep))
      @last_scanner = false
    end
    @scanner = StringScanner.new(buffer)
  else
    if @scanner != scanner
      message = "scanners are different but no buffer: "
      message += "#{@scanner.inspect}(#{@scanner.object_id}): "
      message += "#{scanner.inspect}(#{scanner.object_id})"
      raise UnexpectedError, message
    end
    # trace(__method__, :repos, start, buffer)
    @scanner.pos = start
  end
  read_chunk if @scanner.eos?
end

#keep_dropObject



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/csv/parser.rb', line 230

def keep_drop
  _, _, buffer = @keeps.pop
  # trace(__method__, :done, :empty) unless buffer
  return unless buffer

  last_keep = @keeps.last
  # trace(__method__, :done, :no_last_keep) unless last_keep
  return unless last_keep

  if last_keep[2]
    last_keep[2] << buffer
  else
    last_keep[2] = buffer
  end
  # trace(__method__, :done)
end

#keep_endObject



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/csv/parser.rb', line 185

def keep_end
  # trace(__method__, :start)
  scanner, start, buffer = @keeps.pop
  if scanner == @scanner
    keep = @scanner.string.byteslice(start, @scanner.pos - start)
  else
    keep = @scanner.string.byteslice(0, @scanner.pos)
  end
  if buffer
    buffer << keep
    keep = buffer
  end
  # trace(__method__, :done, keep)
  keep
end

#keep_startObject



178
179
180
181
182
183
# File 'lib/csv/parser.rb', line 178

def keep_start
  # trace(__method__, :start)
  adjust_last_keep
  @keeps.push([@scanner, @scanner.pos, nil])
  # trace(__method__, :done)
end

#restObject



247
248
249
# File 'lib/csv/parser.rb', line 247

def rest
  @scanner.rest
end

#scan(pattern) ⇒ Object



148
149
150
151
152
153
154
155
156
157
# File 'lib/csv/parser.rb', line 148

def scan(pattern)
  # trace(__method__, pattern, :start)
  value = @scanner.scan(pattern)
  # trace(__method__, pattern, :done, :last, value) if @last_scanner
  return value if @last_scanner

  read_chunk if value and @scanner.eos?
  # trace(__method__, pattern, :done, value)
  value
end

#scan_all(pattern) ⇒ Object



159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/csv/parser.rb', line 159

def scan_all(pattern)
  # trace(__method__, pattern, :start)
  value = @scanner.scan(pattern)
  # trace(__method__, pattern, :done, :last, value) if @last_scanner
  return value if @last_scanner

  return nil if value.nil?
  while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
    # trace(__method__, pattern, :sub, sub_value)
    value << sub_value
  end
  # trace(__method__, pattern, :done, value)
  value
end