Class: CSVPlusPlus::Lexer::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/csv_plus_plus/lexer/tokenizer.rb

Overview

A class that contains the use-case-specific regexes for parsing

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input:, tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil) ⇒ Tokenizer

initialize rubocop:disable Metrics/ParameterLists



13
14
15
16
17
18
19
20
21
22
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 13

def initialize(input:, tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
  @scanner = ::StringScanner.new(input.strip)
  @last_token = nil

  @catchall = catchall
  @ignore = ignore
  @tokens = tokens
  @stop_fn = stop_fn
  @alter_matches = alter_matches
end

Instance Attribute Details

#last_tokenObject (readonly)

Returns the value of attribute last_token.



9
10
11
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 9

def last_token
  @last_token
end

#scannerObject (readonly)

Returns the value of attribute scanner.



9
10
11
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 9

def scanner
  @scanner
end

Instance Method Details

#last_matchObject

The value of the last token matched



42
43
44
45
46
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 42

def last_match
  return @alter_matches[@last_token].call(@scanner.matched) if @alter_matches.key?(@last_token)

  @scanner.matched
end

#matches_ignore?Boolean

Scan input against the ignore pattern

Returns:

  • (Boolean)


37
38
39
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 37

def matches_ignore?
  @scanner.scan(@ignore) if @ignore
end

#peekObject

Peek the input



49
50
51
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 49

def peek
  @scanner.peek(100)
end

#restObject

The rest of the un-parsed input. The tokenizer might not need to parse the entire input



60
61
62
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 60

def rest
  @scanner.rest
end

#scan_catchallObject

Scan input against the catchall pattern



32
33
34
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 32

def scan_catchall
  @scanner.scan(@catchall) if @catchall
end

#scan_tokens!Object

Scan tokens and see if any match



26
27
28
29
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 26

def scan_tokens!
  m = @tokens.find { |t| @scanner.scan(t.first) }
  @last_token = m ? m[1] : nil
end

#stop?Boolean

Scan for our stop token (if there is one - some parsers stop early and some don’t)

Returns:

  • (Boolean)


54
55
56
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 54

def stop?
  @stop_fn ? @stop_fn.call(@scanner) : false
end