Class: Argstring::Lexer::Segmenter

Inherits:
Object
  • Object
show all
Defined in:
lib/argstring/lexer/segmenter.rb

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ Segmenter

Returns a new instance of Segmenter.



6
7
8
# File 'lib/argstring/lexer/segmenter.rb', line 6

def initialize(config)
	@config = config
end

Instance Method Details

#split(argstring, errors:) ⇒ Object

Split argstring into raw segments using the configured separator. Enclosures and escapes prevent splitting.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/argstring/lexer/segmenter.rb', line 12

def split(argstring, errors:)
	# Always strip leading/trailing whitespace globally in all modes.
	# Users who want leading/trailing whitespace must enclose it.
	argstring = argstring.to_s.strip
	
	segments = []
	buffer = +""

	i = 0
	in_enclosure = false
	current_encloser = nil

	while i < argstring.length
		ch = argstring[i]

		if in_enclosure
			# Escape inside enclosure consumes next character literally.
			if @config.escape_enabled? && ch == @config.escape
				if i + 1 < argstring.length
					buffer << ch
					buffer << argstring[i + 1]
					i += 2
				else
					buffer << ch
					i += 1
				end
				next
			end

			buffer << ch
			if ch == current_encloser[:close]
				in_enclosure = false
				current_encloser = nil
			end
			i += 1
			next
		end

		# Start enclosure if configured.
		pair = @config.encloser_open_pair_for(ch)
		if pair
			in_enclosure = true
			current_encloser = pair
			buffer << ch
			i += 1
			next
		end

		# Escape outside enclosure.
		if @config.escape_enabled? && ch == @config.escape
			# If separator is whitespace, escaping whitespace separators is not allowed.
			if @config.whitespace_separator? && i + 1 < argstring.length && @config.whitespace_char?(argstring[i + 1])
				i += 1
				next
			end

			if i + 1 < argstring.length
				buffer << ch
				buffer << argstring[i + 1]
				i += 2
			else
				buffer << ch
				i += 1
			end
			next
		end

		# Separator handling.
		if @config.separator_boundary_at?(argstring, i)
			flush_segment!(segments, buffer)

			if @config.whitespace_separator?
				# Collapse whitespace runs.
				while i < argstring.length && @config.whitespace_char?(argstring[i])
					i += 1
				end
			else
				# Non-whitespace separator does not collapse.
				i += 1
			end
			next
		end

		buffer << ch
		i += 1
	end

	if in_enclosure
		errors << ParseError.new(
			code: :unterminated_enclosure,
			message: "Unterminated enclosure in input"
		)
		return []
	end

	flush_segment!(segments, buffer)
	segments
end