Class: Zaid::LexerComponents::Tokenizer

Inherits:

Object

Object
Zaid::LexerComponents::Tokenizer

show all

Includes:: Keywords

Defined in:: lib/zaid/lexer_components/tokenizer.rb

Constant Summary collapse

KEYWORDS_MAPPING =

{
  AND => :AND,
  CLASS => :CLASS,
  DIVIDE => :DIVIDE,
  ELSE => :ELSE,
  EQUALS => :EQUALS,
  FALSE => :FALSE,
  GREATER => :GREATER,
  IF => :IF,
  IS => :IS,
  IT_IS => :IT_IS,
  LESS => :LESS,
  METHOD => :METHOD,
  MINUS => :MINUS,
  NIL => :NIL,
  NOT => :NOT,
  OR => :OR,
  PLUS => :PLUS,
  RECEIVE => :RECEIVE,
  THAN => :THAN,
  THEN => :THEN,
  TIMES => :TIMES,
  TRUE => :TRUE,
  WAS => :WAS,
  WHILE => :WHILE
}.freeze

COMMENT_PREFIXES =

['#', 'تعليق:', 'ملاحظة:', 'سؤال:'].freeze

INDENT_KEYWORDS =

[THEN, IS, ELSE, IT_IS].freeze

ARABIC_CHARACTERS =

'ابتةثجحخدذرزسشصضطظعغفقكلمنهوىيءآأؤإئ'

ARABIC_DIGITS =

'٠١٢٣٤٥٦٧٨٩'

ENGLISH_DIGITS =

'0123456789'

DIGITS =

[ARABIC_DIGITS, ENGLISH_DIGITS].join

TOKEN_PATTERNS =

[
  { pattern: /\G((#{Regexp.union(COMMENT_PREFIXES)}).*$)/, type: :comment },
  { pattern: /\G([#{ARABIC_CHARACTERS}_ـ][#{ARABIC_CHARACTERS}#{DIGITS}_ـ]*؟?)/, type: :identifier },
  { pattern: /\G([#{DIGITS}]+\.[#{DIGITS}]+)/, type: :float },
  { pattern: /\G([#{DIGITS}]+)/, type: :number },
  { pattern: /\G"([^"]*)"/, type: :string },
  { pattern: /\G\n( *)/m, type: :dedent },
  { pattern: /\G(\|\||&&|==|!=|<=|>=|<|>)/, type: :operator },
  { pattern: /\G(.)/, type: :single_character }
].freeze

INDENT_PATTERN =

/\G\n( +)/m

Constants included from Keywords

Keywords::AND, Keywords::CLASS, Keywords::DIVIDE, Keywords::ELSE, Keywords::EQUALS, Keywords::FALSE, Keywords::GREATER, Keywords::IF, Keywords::IS, Keywords::IT_IS, Keywords::LESS, Keywords::METHOD, Keywords::MINUS, Keywords::NIL, Keywords::NOT, Keywords::OR, Keywords::PLUS, Keywords::RECEIVE, Keywords::THAN, Keywords::THEN, Keywords::TIMES, Keywords::TRUE, Keywords::WAS, Keywords::WHILE

Instance Method Summary collapse

#tokenize(code, run_compression: true) ⇒ Object

Instance Method Details

#tokenize(code, run_compression: true) ⇒ `Object`