Class: RubyLexUtils
Constant Summary collapse
- OPENING_DELIMITER_TYPES =
%i(PARENTHESIS_LEFT BRACKET_LEFT BRACE_LEFT BRACKET_LEFT_ARRAY)
- CLOSING_DELIMITER_TYPES =
%i(PARENTHESIS_RIGHT BRACKET_RIGHT BRACE_RIGHT BRACKET_RIGHT_ARRAY)
- UNINSTRUMENTABLE_KEYWORDS =
%i( KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DEF KEYWORD_UNDEF KEYWORD_BEGIN KEYWORD_RESCUE KEYWORD_ENSURE KEYWORD_END KEYWORD_IF KEYWORD_UNLESS KEYWORD_THEN KEYWORD_ELSIF KEYWORD_ELSE KEYWORD_CASE KEYWORD_WHEN KEYWORD_WHILE KEYWORD_UNTIL KEYWORD_FOR KEYWORD_BREAK KEYWORD_NEXT KEYWORD_REDO KEYWORD_RETRY KEYWORD_IN KEYWORD_DO KEYWORD_RETURN KEYWORD_ALIAS )
Instance Method Summary collapse
-
#each_lexed_segment(text) {|line, instrumentable, inside_begin, line_no| ... } ⇒ Object
Yields each lexed segment and if the segment is instrumentable.
Instance Method Details
#each_lexed_segment(text) {|line, instrumentable, inside_begin, line_no| ... } ⇒ Object
Yields each lexed segment and if the segment is instrumentable
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/openc3/utilities/ruby_lex_utils.rb', line 45 def each_lexed_segment(text) line = '' begin_indent = nil inside_begin = false string_begin = false orig_line_no = nil line_no = nil rescue_line_no = nil waiting_on_newline = false waiting_on_close = 0 prev_token = nil instrumentable = true tokens = Prism.lex(text).value # See: https://github.com/ruby/prism/blob/main/lib/prism/parse_result.rb # for what is returned by Prism.lex # We process the tokens in pairs to recreate spacing and handle string assignments tokens.each_cons(2) do |(token, lex_state), (next_token, _next_lex_state)| # pp token # Uncomment for debugging # Ignore embedded documentation must be at column 0 and looks like: =begin This is a comment And so is this =end if token.type == :EMBDOC_BEGIN or token.type == :EMBDOC_LINE or token.type == :EMBDOC_END prev_token = token next end # Recreate the spaces at the beginning of a line # This has to come before we add the token.value to the line if prev_token.nil? line += ' ' * token.location.start_column # If the previous token is STRING_CONTENT it is probably string interpolation so ignore it # Otherwise if the previous token has changed lines we're on a newline so add space elsif prev_token.type != :STRING_CONTENT and prev_token.location.end_line - prev_token.location.start_line > 0 line += ' ' * token.location.start_column end prev_token = token # Comments require tacking on a newline but are otherwise ignored if token.type == :COMMENT line += "\n" waiting_on_newline = false else line += token.value end if UNINSTRUMENTABLE_KEYWORDS.include?(token.type) instrumentable = false end # We're processing tokens in pairs so we need to check if we're at the end # of the file and process the last line if next_token.type == :EOF if !line.empty? yield line, instrumentable, inside_begin, orig_line_no end break end # Recreate spaces between tokens rather than trying to figure out # which tokens require spacing before and after if token.location.start_line == next_token.location.start_line spaces = next_token.location.start_column - token.location.end_column line += ' ' * spaces end line_no ||= token.location.start_line # Keep track of the original line number because the line number can change # when we're putting together multiline structures like strings, arrays, hashes, etc. orig_line_no ||= line_no case token.type when :BRACE_LEFT # BRACE is a special case because it can be used for hashes and blocks if lex_state != (Ripper::EXPR_BEG | Ripper::EXPR_LABEL) instrumentable = false end waiting_on_close += 1 when :STRING_BEGIN # Mark when a string begins to allow for processing string interpolation tokens string_begin = true line_no = token.location.start_line when :STRING_END string_begin = false next when :KEYWORD_BEGIN inside_begin = true begin_indent = token.location.start_column unless begin_indent # Don't restart for nested begins when :KEYWORD_RESCUE rescue_line_no = token.location.start_line when :KEYWORD_END # Assume the begin and end are aligned # Otherwise we have to count any keywords that can close with END if token.location.start_line == rescue_line_no || token.location.start_column == begin_indent inside_begin = false end when *OPENING_DELIMITER_TYPES waiting_on_close += 1 when *CLOSING_DELIMITER_TYPES waiting_on_close -= 1 waiting_on_newline = true when :NEWLINE, :IGNORED_NEWLINE waiting_on_newline = false # If the next token is a STRING_BEGIN then hold off processing the newline # because it's going to be a string assignment next if next_token.type == :STRING_BEGIN end # Don't process the line yet if we're waiting for additional tokens next if string_begin or waiting_on_newline or waiting_on_close > 0 # This is where we process the line and yield it if line_no != token.location.start_line or line_no != token.location.end_line yield line, instrumentable, inside_begin, orig_line_no line = '' instrumentable = true orig_line_no = nil line_no = nil end end end |