Method: Fluent::Plugin::CSVParser#parse_fast_internal

Defined in:
lib/fluent/plugin/parser_csv.rb

#parse_fast_internal(text) ⇒ Object

CSV.parse_line is too slow due to initialize lots of object and CSV module doesn’t provide the efficient method for parsing single line. This method avoids the overhead of CSV.parse_line for typical patterns



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/fluent/plugin/parser_csv.rb', line 64

def parse_fast_internal(text)
  record = {}
  text.chomp!

  return record if text.empty?

  # use while because while is now faster than each_with_index
  columns = text.split(@delimiter, -1)
  num_columns = columns.size
  i = 0
  j = 0
  while j < num_columns
    column = columns[j]

    case column.count(@quote_char)
    when 0
      if column.empty?
        column = nil
      end
    when 1
      if column.start_with?(@quote_char)
        to_merge = [column]
        j += 1
        while j < num_columns
          merged_col = columns[j]
          to_merge << merged_col
          break if merged_col.end_with?(@quote_char)
          j += 1
        end
        column = to_merge.join(@delimiter)[1..-2]
      end
    when 2
      if column.start_with?(@quote_char) && column.end_with?(@quote_char)
        column = column[1..-2]
      end
    else
      if column.start_with?(@quote_char) && column.end_with?(@quote_char)
        column = column[1..-2]
      end
      column.gsub!(@escape_pattern, @quote_char)
    end

    record[@keys[i]] = column
    j += 1
    i += 1
  end
  record
end