Class: Puppet::Pops::Parser::Lexer
- Inherits:
-
Object
- Object
- Puppet::Pops::Parser::Lexer
show all
- Extended by:
- Forwardable
- Defined in:
- lib/puppet/pops/parser/lexer.rb
Defined Under Namespace
Modules: Contextual
Classes: Token, TokenList
Constant Summary
collapse
- TOKENS =
TokenList.new
- DQ_initial_token_types =
{'$' => :DQPRE,'"' => :STRING}
- DQ_continuation_token_types =
{'$' => :DQMID,'"' => :DQPOST}
- KEYWORDS =
TokenList.new
- MULTIBYTE =
Puppet::Pops::Parser::Locator::MULTIBYTE
- SKIPPATTERN =
MULTIBYTE ? %r{[[:blank:]\r]+} : %r{[ \t\r]+}
- LBRACE_CHAR =
'{'
- @@pairs =
{
"{" => "}",
"(" => ")",
"[" => "]",
"<|" => "|>",
"<<|" => "|>>",
"|" => "|"
}
Instance Attribute Summary collapse
Instance Method Summary
collapse
-
#assert_numeric(value) ⇒ Object
-
#clear ⇒ Object
-
#expected ⇒ Object
-
#find_regex_token ⇒ Object
Find the next token that matches a regex.
-
#find_string_token ⇒ Object
-
#find_token ⇒ Object
Find the next token, returning the string and the token.
-
#followed_by ⇒ Object
Returns “<eof>” if at end of input, else the following 5 characters with n r t escaped.
-
#format_quote(q) ⇒ Object
-
#fullscan ⇒ Object
scan the whole file basically just used for testing.
-
#initialize ⇒ Lexer
constructor
-
#initvars ⇒ Object
-
#lex_error(msg) ⇒ Object
-
#line ⇒ Object
Returns the line number (starting from 1) for the current position in the scanned text (at the end of the last produced, but not necessarily consumed..
-
#match?(r) ⇒ Boolean
-
#munge_token(token, value) ⇒ Object
Make any necessary changes to the token and/or value.
-
#namespace ⇒ Object
Collect the current namespace.
-
#pos ⇒ Object
-
#positioned_message(msg) ⇒ Object
Formats given message by appending file, line and position if available.
-
#positioned_value(value) ⇒ Object
Returns a hash with the current position in source based on the current lexing context.
-
#replace_false_start_with_text(appendix) ⇒ Object
-
#scan {|[false,false]| ... } ⇒ Object
this is the heart of the lexer.
-
#slurpstring(terminators, escapes = %w{ \\ $ ' " r n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object
we’ve encountered the start of a string…
-
#string=(string, path = '') ⇒ Object
just parse a string, not a whole file.
-
#tokenize_interpolated_string(token_type, preamble = '') ⇒ Object
-
#warn_if_variable_has_hyphen(var_name) ⇒ Object
Constructor Details
#initialize ⇒ Lexer
Returns a new instance of Lexer.
453
454
455
|
# File 'lib/puppet/pops/parser/lexer.rb', line 453
def initialize
initvars
end
|
Instance Attribute Details
15
16
17
|
# File 'lib/puppet/pops/parser/lexer.rb', line 15
def file
@file
end
|
#indefine ⇒ Object
Also known as:
indefine?
19
20
21
|
# File 'lib/puppet/pops/parser/lexer.rb', line 19
def indefine
@indefine
end
|
#lexing_context ⇒ Object
15
16
17
|
# File 'lib/puppet/pops/parser/lexer.rb', line 15
def lexing_context
@lexing_context
end
|
17
18
19
|
# File 'lib/puppet/pops/parser/lexer.rb', line 17
def locator
@locator
end
|
#token_queue ⇒ Object
15
16
17
|
# File 'lib/puppet/pops/parser/lexer.rb', line 15
def token_queue
@token_queue
end
|
Instance Method Details
#assert_numeric(value) ⇒ Object
457
458
459
460
461
462
463
464
465
|
# File 'lib/puppet/pops/parser/lexer.rb', line 457
def assert_numeric(value)
if value =~ /^0[xX].*$/
lex_error (positioned_message("Not a valid hex number #{value}")) unless value =~ /^0[xX][0-9A-Fa-f]+$/
elsif value =~ /^0[^.].*$/
lex_error(positioned_message("Not a valid octal number #{value}")) unless value =~ /^0[0-7]+$/
else
lex_error(positioned_message("Not a valid decimal number #{value}")) unless value =~ /0?\d+(?:\.\d+)?(?:[eE]-?\d+)?/
end
end
|
376
377
378
|
# File 'lib/puppet/pops/parser/lexer.rb', line 376
def clear
initvars
end
|
380
381
382
383
384
|
# File 'lib/puppet/pops/parser/lexer.rb', line 380
def expected
return nil if @expected.empty?
name = @expected[-1]
TOKENS.lookup(name) or lex_error "Internal Lexer Error: Could not find expected token #{name}"
end
|
#find_regex_token ⇒ Object
Find the next token that matches a regex. We look for these first.
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
|
# File 'lib/puppet/pops/parser/lexer.rb', line 424
def find_regex_token
best_token = nil
best_length = 0
_lxc = @lexing_context
_scn = @scanner
TOKENS.regex_tokens.each do |token|
if length = _scn.match?(token.regex) and token.acceptable?(_lxc)
if length > best_length
best_length = length
best_token = token
end
end
end
return best_token, _scn.scan(best_token.regex) if best_token
end
|
#find_string_token ⇒ Object
408
409
410
411
412
413
414
415
416
417
418
419
420
421
|
# File 'lib/puppet/pops/parser/lexer.rb', line 408
def find_string_token
_scn = @scanner
s = _scn.peek(3)
token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1])
unless token
return [nil, nil]
end
[ token, _scn.scan(token.regex) ]
end
|
#find_token ⇒ Object
Find the next token, returning the string and the token.
446
447
448
|
# File 'lib/puppet/pops/parser/lexer.rb', line 446
def find_token
shift_token || find_regex_token || find_string_token
end
|
#followed_by ⇒ Object
Returns “<eof>” if at end of input, else the following 5 characters with n r t escaped
665
666
667
668
669
670
671
672
|
# File 'lib/puppet/pops/parser/lexer.rb', line 665
def followed_by
return "<eof>" if @scanner.eos?
result = @scanner.rest[0,5] + "..."
result.gsub!("\t", '\t')
result.gsub!("\n", '\n')
result.gsub!("\r", '\r')
result
end
|
674
675
676
677
678
679
680
|
# File 'lib/puppet/pops/parser/lexer.rb', line 674
def format_quote q
if q == "'"
'"\'"'
else
"'#{q}'"
end
end
|
scan the whole file basically just used for testing
388
389
390
391
392
393
394
395
396
397
|
# File 'lib/puppet/pops/parser/lexer.rb', line 388
def fullscan
array = []
self.scan { |token, str|
@indefine = false
array.push([token,str])
}
array
end
|
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
|
# File 'lib/puppet/pops/parser/lexer.rb', line 467
def initvars
@previous_token = nil
@scanner = nil
@file = nil
@namestack = []
@token_queue = []
@indefine = false
@expected = []
@lexing_context = {
:after => nil,
:start_of_line => true,
:offset => 0, :end_offset => 0, :brace_count => 0, :interpolation_stack => [] }
end
|
#lex_error(msg) ⇒ Object
22
23
24
|
# File 'lib/puppet/pops/parser/lexer.rb', line 22
def lex_error msg
raise Puppet::LexError.new(msg)
end
|
Returns the line number (starting from 1) for the current position in the scanned text (at the end of the last produced, but not necessarily consumed.
749
750
751
752
|
# File 'lib/puppet/pops/parser/lexer.rb', line 749
def line
return 1 unless @lexing_context && locator
locator.line_for_offset(@lexing_context[:end_offset])
end
|
#match?(r) ⇒ Boolean
622
623
624
|
# File 'lib/puppet/pops/parser/lexer.rb', line 622
def match? r
@scanner.match?(r)
end
|
#munge_token(token, value) ⇒ Object
Make any necessary changes to the token and/or value.
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
|
# File 'lib/puppet/pops/parser/lexer.rb', line 490
def munge_token(token, value)
return token, value if value.is_a? Hash
@scanner.skip(SKIPPATTERN) if token.skip_text
return if token.skip
token, value = token.convert(self, value) if token.respond_to?(:convert)
return unless token
return if token.skip
return token, value if value.is_a? Hash
return token, positioned_value(value)
end
|
#namespace ⇒ Object
Collect the current namespace.
535
536
537
|
# File 'lib/puppet/pops/parser/lexer.rb', line 535
def namespace
@namestack.join("::")
end
|
522
523
524
|
# File 'lib/puppet/pops/parser/lexer.rb', line 522
def pos
@locator.pos_on_line(@lexing_context[:offset])
end
|
#positioned_message(msg) ⇒ Object
Formats given message by appending file, line and position if available.
657
658
659
660
661
662
|
# File 'lib/puppet/pops/parser/lexer.rb', line 657
def positioned_message msg
result = [msg]
result << "in file #{file}" if file
result << "at line #{line}:#{pos}" if line
result.join(" ")
end
|
#positioned_value(value) ⇒ Object
Returns a hash with the current position in source based on the current lexing context
513
514
515
516
517
518
519
520
|
# File 'lib/puppet/pops/parser/lexer.rb', line 513
def positioned_value(value)
{
:value => value,
:locator => @locator,
:offset => @lexing_context[:offset],
:end_offset => @lexing_context[:end_offset]
}
end
|
#replace_false_start_with_text(appendix) ⇒ Object
723
724
725
726
727
728
729
730
731
|
# File 'lib/puppet/pops/parser/lexer.rb', line 723
def replace_false_start_with_text(appendix)
last_token = token_queue.pop
value = last_token.last
if value.is_a? Hash
value[:value] + appendix
else
value + appendix
end
end
|
#scan {|[false,false]| ... } ⇒ Object
this is the heart of the lexer
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
|
# File 'lib/puppet/pops/parser/lexer.rb', line 544
def scan
_scn = @scanner
lex_error "Internal Error: No string or file given to lexer to process." unless _scn
_scn.skip(SKIPPATTERN)
_lbrace = '{'.freeze
until token_queue.empty? and _scn.eos? do
offset = _scn.pos
matched_token, value = find_token
end_offset = _scn.pos
lex_error "Could not match #{_scn.rest[/^(\S+|\s+|.*)/]}" unless matched_token
newline = matched_token.name == :RETURN
_lxc = @lexing_context
_lxc[:start_of_line] = newline
_lxc[:offset] = offset
_lxc[:end_offset] = end_offset
final_token, token_value = munge_token(matched_token, value)
_lxc[:end_offset] = _scn.pos
unless final_token
_scn.skip(SKIPPATTERN)
next
end
_lxc[:after] = final_token.name unless newline
if final_token.name == :DQPRE
_lxc[:interpolation_stack] << _lxc[:brace_count]
elsif final_token.name == :DQPOST
_lxc[:interpolation_stack].pop
end
value = token_value[:value]
_expected = @expected
if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE
_expected << match
elsif exp = _expected[-1] and exp == value and final_token.name != :DQUOTE and final_token.name != :SQUOTE
_expected.pop
end
yield [final_token.name, token_value]
_prv = @previous_token
if _prv
namestack(value) if _prv.name == :CLASS and value != LBRACE_CHAR
if _prv.name == :DEFINE
if indefine?
msg = "Cannot nest definition #{value} inside #{@indefine}"
self.indefine = false
raise Puppet::ParseError, msg
end
@indefine = value
end
end
@previous_token = final_token
_scn.skip(SKIPPATTERN)
end
yield [false,false]
end
|
#slurpstring(terminators, escapes = %w{ \\ $ ' " r n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object
we’ve encountered the start of a string… slurp in the rest of the string and return it
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
|
# File 'lib/puppet/pops/parser/lexer.rb', line 632
def slurpstring(terminators,escapes=%w{ \\ $ ' " r n t s }+["\n"],ignore_invalid_escapes=false)
last = @scanner.matched
str = @scanner.scan_until(/([^\\]|^|[^\\])([\\]{2})*[#{terminators}]/) || lex_error(positioned_message("Unclosed quote after #{format_quote(last)} followed by '#{followed_by}'"))
str.gsub!(/\\(.)/m) {
ch = $1
if escapes.include? ch
case ch
when 'r'; "\r"
when 'n'; "\n"
when 't'; "\t"
when 's'; " "
when "\n"; ''
else ch
end
else
Puppet.warning(positioned_message("Unrecognized escape sequence '\\#{ch}'")) unless ignore_invalid_escapes
"\\#{ch}"
end
}
[ str[0..-2],str[-1,1] ]
end
|
#string=(string, path = '') ⇒ Object
just parse a string, not a whole file
734
735
736
737
|
# File 'lib/puppet/pops/parser/lexer.rb', line 734
def string=(string, path='')
@scanner = StringScanner.new(string.freeze)
@locator = Puppet::Pops::Parser::Locator.locator(string, path)
end
|
#tokenize_interpolated_string(token_type, preamble = '') ⇒ Object
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
|
# File 'lib/puppet/pops/parser/lexer.rb', line 682
def tokenize_interpolated_string(token_type,preamble='')
value,terminator = slurpstring('"$')
braced = terminator == '$' && @scanner.scan(/\{/)
lxc = @lexing_context
lxc[:end_offset] = @scanner.pos
token_queue << [TOKENS[token_type[terminator]],positioned_value(preamble+value)]
variable_regex = if Puppet[:allow_variables_with_dashes]
TOKENS[:VARIABLE_WITH_DASH].regex
else
TOKENS[:VARIABLE].regex
end
if terminator != '$' or braced
return token_queue.shift
end
tmp_offset = @scanner.pos
if var_name = @scanner.scan(variable_regex)
lxc[:offset] = tmp_offset
lxc[:end_offset] = @scanner.pos
warn_if_variable_has_hyphen(var_name)
if braced && @scanner.match?(%r{[ \t\r]*\(})
token_queue << [TOKENS[:NAME], positioned_value(var_name)]
else
token_queue << [TOKENS[:VARIABLE],positioned_value(var_name)]
end
lxc[:offset] = @scanner.pos
tokenize_interpolated_string(DQ_continuation_token_types)
else
tokenize_interpolated_string(token_type, replace_false_start_with_text(terminator))
end
end
|
#warn_if_variable_has_hyphen(var_name) ⇒ Object
739
740
741
742
743
|
# File 'lib/puppet/pops/parser/lexer.rb', line 739
def warn_if_variable_has_hyphen(var_name)
if var_name.include?('-')
Puppet.deprecation_warning("Using `-` in variable names is deprecated at #{file || '<string>'}:#{line}. See http://links.puppetlabs.com/puppet-hyphenated-variable-deprecation")
end
end
|