Class: Argstring::Lexer::Segmenter
- Inherits:
-
Object
- Object
- Argstring::Lexer::Segmenter
- Defined in:
- lib/argstring/lexer/segmenter.rb
Instance Method Summary collapse
-
#initialize(config) ⇒ Segmenter
constructor
A new instance of Segmenter.
-
#split(argstring, errors:) ⇒ Object
Split argstring into raw segments using the configured separator.
Constructor Details
#initialize(config) ⇒ Segmenter
Returns a new instance of Segmenter.
6 7 8 |
# File 'lib/argstring/lexer/segmenter.rb', line 6 def initialize(config) @config = config end |
Instance Method Details
#split(argstring, errors:) ⇒ Object
Split argstring into raw segments using the configured separator. Enclosures and escapes prevent splitting.
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# File 'lib/argstring/lexer/segmenter.rb', line 12 def split(argstring, errors:) # Always strip leading/trailing whitespace globally in all modes. # Users who want leading/trailing whitespace must enclose it. argstring = argstring.to_s.strip segments = [] buffer = +"" i = 0 in_enclosure = false current_encloser = nil while i < argstring.length ch = argstring[i] if in_enclosure # Escape inside enclosure consumes next character literally. if @config.escape_enabled? && ch == @config.escape if i + 1 < argstring.length buffer << ch buffer << argstring[i + 1] i += 2 else buffer << ch i += 1 end next end buffer << ch if ch == current_encloser[:close] in_enclosure = false current_encloser = nil end i += 1 next end # Start enclosure if configured. pair = @config.encloser_open_pair_for(ch) if pair in_enclosure = true current_encloser = pair buffer << ch i += 1 next end # Escape outside enclosure. if @config.escape_enabled? && ch == @config.escape # If separator is whitespace, escaping whitespace separators is not allowed. if @config.whitespace_separator? && i + 1 < argstring.length && @config.whitespace_char?(argstring[i + 1]) i += 1 next end if i + 1 < argstring.length buffer << ch buffer << argstring[i + 1] i += 2 else buffer << ch i += 1 end next end # Separator handling. if @config.separator_boundary_at?(argstring, i) flush_segment!(segments, buffer) if @config.whitespace_separator? # Collapse whitespace runs. while i < argstring.length && @config.whitespace_char?(argstring[i]) i += 1 end else # Non-whitespace separator does not collapse. i += 1 end next end buffer << ch i += 1 end if in_enclosure errors << ParseError.new( code: :unterminated_enclosure, message: "Unterminated enclosure in input" ) return [] end flush_segment!(segments, buffer) segments end |