Class: NScript::Lexer
- Inherits:
-
Object
- Object
- NScript::Lexer
- Defined in:
- lib/nscript/lexer/lexer.rb
Constant Summary collapse
- KEYWORDS =
["if", "else", "then", "unless", "true", "false", "yes", "no", "on", "off", "and", "or", "is", "isnt", "not", "new", "return", "try", "catch", "finally", "throw", "break", "continue", "for", "in", "of", "by", "where", "while", "delete", "instanceof", "typeof", "switch", "when", "super", "extends"]
- IDENTIFIER =
/\A([a-zA-Z$_](\w|\$)*)/- NUMBER =
/\A(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i- STRING =
/\A(""|''|"(.*?)([^\\]|\\\\)"|'(.*?)([^\\]|\\\\)')/m- HEREDOC =
/\A("{6}|'{6}|"{3}\n?(.*?)\n?([ \t]*)"{3}|'{3}\n?(.*?)\n?([ \t]*)'{3})/m- JS =
/\A(``|`(.*?)([^\\]|\\\\)`)/m- OPERATOR =
/\A([+\*&|\/\-%=<>:!?]+)/- WHITESPACE =
/\A([ \t]+)/- COMMENT =
/\A(((\n?[ \t]*)?#.*$)+)/- CODE =
/\A((-|=)>)/- REGEX =
/\A(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/- MULTI_DENT =
/\A((\n([ \t]*))+)(\.)?/- LAST_DENT =
/\n([ \t]*)/- ASSIGNMENT =
/\A(:|=)\Z/- JS_CLEANER =
/(\A`|`\Z)/- MULTILINER =
/\n/- STRING_NEWLINES =
/\n[ \t]*/- COMMENT_CLEANER =
/(^[ \t]*#|\n[ \t]*$)/- NO_NEWLINE =
/\A([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)\Z/- HEREDOC_INDENT =
/^[ \t]+/- NOT_REGEX =
[ :IDENTIFIER, :NUMBER, :REGEX, :STRING, ')', '++', '--', ']', '}', :FALSE, :NULL, :TRUE ]
- CALLABLE =
[:IDENTIFIER, :SUPER, ')', ']', '}', :STRING]
Instance Method Summary collapse
- #close_indentation ⇒ Object
- #comment_token ⇒ Object
- #extract_next_token ⇒ Object
- #heredoc_token ⇒ Object
- #identifier_token ⇒ Object
- #indent_token ⇒ Object
- #js_token ⇒ Object
- #last_tag ⇒ Object
- #last_value ⇒ Object
- #literal_token ⇒ Object
- #newline_token(newlines) ⇒ Object
- #number_token ⇒ Object
- #outdent_token(move_out) ⇒ Object
- #regex_token ⇒ Object
- #string_token ⇒ Object
- #suppress_newlines(newlines) ⇒ Object
- #tag_parameters ⇒ Object
- #token(tag, value) ⇒ Object
- #tokenize(code) ⇒ Object
- #whitespace_token ⇒ Object
Instance Method Details
#close_indentation ⇒ Object
224 225 226 |
# File 'lib/nscript/lexer/lexer.rb', line 224 def close_indentation outdent_token(@indent) end |
#comment_token ⇒ Object
133 134 135 136 137 138 139 |
# File 'lib/nscript/lexer/lexer.rb', line 133 def comment_token return false unless comment = @chunk[COMMENT, 1] @line += comment.scan(MULTILINER).length token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER)) token("\n", "\n") @i += comment.length end |
#extract_next_token ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/nscript/lexer/lexer.rb', line 62 def extract_next_token return if identifier_token return if number_token return if heredoc_token return if string_token return if js_token return if regex_token return if indent_token return if comment_token return if whitespace_token return literal_token end |
#heredoc_token ⇒ Object
108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/nscript/lexer/lexer.rb', line 108 def heredoc_token return false unless match = @chunk.match(HEREDOC) doc = match[2] || match[4] indent = doc.scan(HEREDOC_INDENT).min doc.gsub!(/^#{indent}/, "") doc.gsub!("\n", "\\n") doc.gsub!('"', '\\"') token(:STRING, "\"#{doc}\"") @line += match[1].count("\n") @i += match[1].length end |
#identifier_token ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/nscript/lexer/lexer.rb', line 75 def identifier_token return false unless identifier = @chunk[IDENTIFIER, 1] # Keywords are special identifiers tagged with their own name, # 'if' will result in an [:IF, "if"] token. tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER tag = :LEADING_WHEN if tag == :WHEN && [:OUTDENT, :INDENT, "\n"].include?(last_tag) @tokens[-1][0] = :PROTOTYPE_ACCESS if tag == :IDENTIFIER && last_value == '::' if tag == :IDENTIFIER && last_value == '.' && !(@tokens[-2] && @tokens[-2][1] == '.') if @tokens[-2][0] == "?" @tokens[-1][0] = :SOAK_ACCESS @tokens.delete_at(-2) else @tokens[-1][0] = :PROPERTY_ACCESS end end token(tag, identifier) @i += identifier.length end |
#indent_token ⇒ Object
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/nscript/lexer/lexer.rb', line 141 def indent_token return false unless indent = @chunk[MULTI_DENT, 1] @line += indent.scan(MULTILINER).size @i += indent.size next_character = @chunk[MULTI_DENT, 4] no_newlines = next_character == '.' || (last_value.to_s.match(NO_NEWLINE) && @tokens[-2][0] != '.' && !last_value.match(CODE)) return suppress_newlines(indent) if no_newlines size = indent.scan(LAST_DENT).last.last.length return newline_token(indent) if size == @indent if size > @indent token(:INDENT, size - @indent) @indents << (size - @indent) else outdent_token(@indent - size) end @indent = size end |
#js_token ⇒ Object
120 121 122 123 124 |
# File 'lib/nscript/lexer/lexer.rb', line 120 def js_token return false unless script = @chunk[JS, 1] token(:JS, script.gsub(JS_CLEANER, '')) @i += script.length end |
#last_tag ⇒ Object
205 206 207 |
# File 'lib/nscript/lexer/lexer.rb', line 205 def last_tag @tokens.last && @tokens.last[0] end |
#last_value ⇒ Object
201 202 203 |
# File 'lib/nscript/lexer/lexer.rb', line 201 def last_value @tokens.last && @tokens.last[1] end |
#literal_token ⇒ Object
184 185 186 187 188 189 190 191 192 193 194 195 |
# File 'lib/nscript/lexer/lexer.rb', line 184 def literal_token value = @chunk[OPERATOR, 1] tag_parameters if value && value.match(CODE) value ||= @chunk[0,1] tag = value.match(ASSIGNMENT) ? :ASSIGN : value if !@spaced.equal?(last_value) && CALLABLE.include?(last_tag) tag = :CALL_START if value == '(' tag = :INDEX_START if value == '[' end token(tag, value) @i += value.length end |
#newline_token(newlines) ⇒ Object
174 175 176 177 |
# File 'lib/nscript/lexer/lexer.rb', line 174 def newline_token(newlines) token("\n", "\n") unless last_value == "\n" true end |
#number_token ⇒ Object
94 95 96 97 98 |
# File 'lib/nscript/lexer/lexer.rb', line 94 def number_token return false unless number = @chunk[NUMBER, 1] token(:NUMBER, number) @i += number.length end |
#outdent_token(move_out) ⇒ Object
159 160 161 162 163 164 165 166 |
# File 'lib/nscript/lexer/lexer.rb', line 159 def outdent_token(move_out) while move_out > 0 && !@indents.empty? last_indent = @indents.pop token(:OUTDENT, last_indent) move_out -= last_indent end token("\n", "\n") end |
#regex_token ⇒ Object
126 127 128 129 130 131 |
# File 'lib/nscript/lexer/lexer.rb', line 126 def regex_token return false unless regex = @chunk[REGEX, 1] return false if NOT_REGEX.include?(last_tag) token(:REGEX, regex) @i += regex.length end |
#string_token ⇒ Object
100 101 102 103 104 105 106 |
# File 'lib/nscript/lexer/lexer.rb', line 100 def string_token return false unless string = @chunk[STRING, 1] escaped = string.gsub(STRING_NEWLINES, " \\\n") token(:STRING, escaped) @line += string.count("\n") @i += string.length end |
#suppress_newlines(newlines) ⇒ Object
179 180 181 182 |
# File 'lib/nscript/lexer/lexer.rb', line 179 def suppress_newlines(newlines) @tokens.pop if last_value == "\\" true end |
#tag_parameters ⇒ Object
209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# File 'lib/nscript/lexer/lexer.rb', line 209 def tag_parameters return if last_tag != ')' i = 0 loop do i -= 1 tok = @tokens[i] return if !tok case tok[0] when :IDENTIFIER then tok[0] = :PARAM when ')' then tok[0] = :PARAM_END when '(' then return tok[0] = :PARAM_START end end end |
#token(tag, value) ⇒ Object
197 198 199 |
# File 'lib/nscript/lexer/lexer.rb', line 197 def token(tag, value) @tokens << [tag, Value.new(value, @line)] end |
#tokenize(code) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/nscript/lexer/lexer.rb', line 45 def tokenize(code) @code = code.chomp # Cleanup code by remove extra line breaks @i = 0 # Current character position we're parsing @line = 1 # The current line. @indent = 0 # The current indent level. @indents = [] # The stack of all indent levels we are currently within. @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value] @spaced = nil # The last value that has a space following it. while @i < @code.length @chunk = @code[@i..-1] extract_next_token end puts "original stream: #{@tokens.inspect}" if ENV['VERBOSE'] close_indentation Rewriter.new.rewrite(@tokens) end |
#whitespace_token ⇒ Object
168 169 170 171 172 |
# File 'lib/nscript/lexer/lexer.rb', line 168 def whitespace_token return false unless whitespace = @chunk[WHITESPACE, 1] @spaced = last_value @i += whitespace.length end |