Class: RubyLexUtils

Inherits:
Object show all
Defined in:
lib/openc3/utilities/ruby_lex_utils.rb

Constant Summary collapse

OPENING_DELIMITER_TYPES =
%i(PARENTHESIS_LEFT BRACKET_LEFT BRACE_LEFT BRACKET_LEFT_ARRAY)
CLOSING_DELIMITER_TYPES =
%i(PARENTHESIS_RIGHT BRACKET_RIGHT BRACE_RIGHT BRACKET_RIGHT_ARRAY)
UNINSTRUMENTABLE_KEYWORDS =
%i(
  KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DEF KEYWORD_UNDEF KEYWORD_BEGIN KEYWORD_RESCUE
  KEYWORD_ENSURE KEYWORD_END KEYWORD_IF KEYWORD_UNLESS
  KEYWORD_THEN KEYWORD_ELSIF KEYWORD_ELSE KEYWORD_CASE KEYWORD_WHEN
  KEYWORD_WHILE KEYWORD_UNTIL KEYWORD_FOR KEYWORD_BREAK KEYWORD_NEXT
  KEYWORD_REDO KEYWORD_RETRY KEYWORD_IN KEYWORD_DO KEYWORD_RETURN KEYWORD_ALIAS
)

Instance Method Summary collapse

Instance Method Details

#each_lexed_segment(text) {|line, instrumentable, inside_begin, line_no| ... } ⇒ Object

Yields each lexed segment and if the segment is instrumentable

Parameters:

Yield Parameters:

  • line (String)

    The entire line

  • instrumentable (Boolean)

    Whether the line is instrumentable

  • inside_begin (Integer)

    The level of indentation

  • line_no (Integer)

    The current line number



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/openc3/utilities/ruby_lex_utils.rb', line 45

def each_lexed_segment(text)
  line = ''
  begin_indent = nil
  inside_begin = false
  string_begin = false
  orig_line_no = nil
  line_no = nil
  rescue_line_no = nil
  waiting_on_newline = false
  waiting_on_close = 0
  prev_token = nil
  instrumentable = true
  tokens = Prism.lex(text).value
  # See: https://github.com/ruby/prism/blob/main/lib/prism/parse_result.rb
  # for what is returned by Prism.lex
  # We process the tokens in pairs to recreate spacing and handle string assignments
  tokens.each_cons(2) do |(token, lex_state), (next_token, _next_lex_state)|
    # pp token # Uncomment for debugging
    # Ignore embedded documentation must be at column 0 and looks like:
=begin
This is a comment
And so is this
=end
    if token.type == :EMBDOC_BEGIN or token.type == :EMBDOC_LINE or token.type == :EMBDOC_END
      prev_token = token
      next
    end

    # Recreate the spaces at the beginning of a line
    # This has to come before we add the token.value to the line
    if prev_token.nil?
      line += ' ' * token.location.start_column
    # If the previous token is STRING_CONTENT it is probably string interpolation so ignore it
    # Otherwise if the previous token has changed lines we're on a newline so add space
    elsif prev_token.type != :STRING_CONTENT and prev_token.location.end_line - prev_token.location.start_line > 0
      line += ' ' * token.location.start_column
    end
    prev_token = token

    # Comments require tacking on a newline but are otherwise ignored
    if token.type == :COMMENT
      line += "\n"
      waiting_on_newline = false
    else
      line += token.value
    end

    if UNINSTRUMENTABLE_KEYWORDS.include?(token.type)
      instrumentable = false
    end

    # We're processing tokens in pairs so we need to check if we're at the end
    # of the file and process the last line
    if next_token.type == :EOF
      if !line.empty?
        yield line, instrumentable, inside_begin, orig_line_no
      end
      break
    end

    # Recreate spaces between tokens rather than trying to figure out
    # which tokens require spacing before and after
    if token.location.start_line == next_token.location.start_line
      spaces = next_token.location.start_column - token.location.end_column
      line += ' ' * spaces
    end
    line_no ||= token.location.start_line
    # Keep track of the original line number because the line number can change
    # when we're putting together multiline structures like strings, arrays, hashes, etc.
    orig_line_no ||= line_no

    case token.type
    when :BRACE_LEFT
      # BRACE is a special case because it can be used for hashes and blocks
      if lex_state != (Ripper::EXPR_BEG | Ripper::EXPR_LABEL)
        instrumentable = false
      end
      waiting_on_close += 1
    when :STRING_BEGIN
      # Mark when a string begins to allow for processing string interpolation tokens
      string_begin = true
      line_no = token.location.start_line
    when :STRING_END
      string_begin = false
      next
    when :KEYWORD_BEGIN
      inside_begin = true
      begin_indent = token.location.start_column unless begin_indent # Don't restart for nested begins
    when :KEYWORD_RESCUE
      rescue_line_no = token.location.start_line
    when :KEYWORD_END
      # Assume the begin and end are aligned
      # Otherwise we have to count any keywords that can close with END
      if token.location.start_line == rescue_line_no || token.location.start_column == begin_indent
        inside_begin = false
      end
    when *OPENING_DELIMITER_TYPES
      waiting_on_close += 1
    when *CLOSING_DELIMITER_TYPES
      waiting_on_close -= 1
      waiting_on_newline = true
    when :NEWLINE, :IGNORED_NEWLINE
      waiting_on_newline = false
      # If the next token is a STRING_BEGIN then hold off processing the newline
      # because it's going to be a string assignment
      next if next_token.type == :STRING_BEGIN
    end

    # Don't process the line yet if we're waiting for additional tokens
    next if string_begin or waiting_on_newline or waiting_on_close > 0

    # This is where we process the line and yield it
    if line_no != token.location.start_line or line_no != token.location.end_line
      yield line, instrumentable, inside_begin, orig_line_no
      line = ''
      instrumentable = true
      orig_line_no = nil
      line_no = nil
    end
  end
end