Class: EasyCols::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/easy_cols/parser.rb

Constant Summary collapse

SUPPORTED_FORMATS =
%w[csv tsv table tbl plain auto].freeze

Instance Method Summary collapse

Constructor Details

#initialize(**options) ⇒ Parser

Returns a new instance of Parser.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/easy_cols/parser.rb', line 10

def initialize(**options)
  @options = {
    format:     'auto',
    delimiter:  nil,
    pattern:    nil,
    quotes:     true,
    headers:    1,
    lines:      true,
    blanklines: true,
    comments:   nil,
    start:      nil,
    stop:       nil,
  }.merge(options)
end

Instance Method Details

#detect_format(input) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/easy_cols/parser.rb', line 46

def detect_format(input)
  return 'csv' if input.strip.empty?

  first_line = input.lines.first&.strip || ''

  # Check for table format (has pipe separators and separator line)
  if first_line.include?('|') && input.match?(/^[-_|+]+$/m)
    return 'table'
  end

  # Check for TSV (tabs in first line)
  if first_line.include?("\t")
    return 'tsv'
  end

  # Check for CSV (commas)
  if first_line.include?(',')
    return 'csv'
  end

  # Default to plain if no clear indicators
  'plain'
end

#detected_formatObject



42
43
44
# File 'lib/easy_cols/parser.rb', line 42

def detected_format
  @detected_format
end

#parse(input) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/easy_cols/parser.rb', line 25

def parse(input)
  format = detect_format(input) if @options[:format] == 'auto' || @options[:format].nil?
  format ||= @options[:format] || 'csv'

  # Store the actual format used for reference
  @detected_format = format

  case format
  when 'csv'          then parse_csv(input)
  when 'tsv'          then parse_tsv(input)
  when 'table', 'tbl' then parse_table(input)
  when 'plain'        then parse_plain(input)
  else
    raise FormatError, "Unsupported format: #{format}"
  end
end