Class: Lisp::Tokenizer

Inherits:

Object

Object
Lisp::Tokenizer

show all

Defined in:: lib/rubylisp/tokenizer.rb

Instance Attribute Summary collapse

#line_number ⇒ Object readonly

Returns the value of attribute line_number.

Instance Method Summary collapse

Constructor Details

#initialize(src) ⇒ `Tokenizer`

# File 'lib/rubylisp/tokenizer.rb', line 7

def initialize(src)
  @lookahead_token = 0
  @lookahead_literal = ''
  @source = src
  @position = 0
end

Instance Attribute Details

#line_number ⇒ `Object` (readonly)

Returns the value of attribute line_number.



5
6
7

# File 'lib/rubylisp/tokenizer.rb', line 5

def line_number
  @line_number
end

Instance Method Details

#almost_eof? ⇒ `Boolean`



22
23
24

# File 'lib/rubylisp/tokenizer.rb', line 22

def almost_eof?
  @position == @source.length - 1
end

#consume_token ⇒ `Object`

# File 'lib/rubylisp/tokenizer.rb', line 280

def consume_token
  @lookahead_token, @lookahead_literal = read_next_token
  consume_token if @lookahead_token == :COMMENT
end

#digit?(ch) ⇒ `Boolean`



38
39
40

# File 'lib/rubylisp/tokenizer.rb', line 38

def digit?(ch)
  ch =~ /[[:digit:]]/
end

#divider?(ch) ⇒ `Boolean`



152
153
154

# File 'lib/rubylisp/tokenizer.rb', line 152

def divider?(ch)
  ch =~ /[[[:space:]]\(\)\{\}<>\[\]]/
end

#eof? ⇒ `Boolean`



18
19
20

# File 'lib/rubylisp/tokenizer.rb', line 18

def eof?
  @position >= @source.length
end

#hex?(ch) ⇒ `Boolean`



34
35
36

# File 'lib/rubylisp/tokenizer.rb', line 34

def hex?(ch)
  ch =~ /[abcdefABCDEF]/
end

#init ⇒ `Object`

# File 'lib/rubylisp/tokenizer.rb', line 285

def init
  @line_number = 0
  consume_token
end

#letter?(ch) ⇒ `Boolean`



30
31
32

# File 'lib/rubylisp/tokenizer.rb', line 30

def letter?(ch)
  ch =~ /[[:alpha:]]/
end

#next_char ⇒ `Object`



26
27
28

# File 'lib/rubylisp/tokenizer.rb', line 26

def next_char
  almost_eof? ? nil : @source[@position + 1]
end

#next_token ⇒ `Object`



14
15
16

# File 'lib/rubylisp/tokenizer.rb', line 14

def next_token
  return @lookahead_token, @lookahead_literal
end

#number?(ch) ⇒ `Boolean`



42
43
44

# File 'lib/rubylisp/tokenizer.rb', line 42

def number?(ch)
  digit?(ch) || (ch == '-' && digit?(next_char))
end

#process_escapes(str) ⇒ `Object`

# File 'lib/rubylisp/tokenizer.rb', line 110

def process_escapes(str)
  i = 0
  processed_str = ""
  while i < str.length
    if str[i] == ?\\
      processed_str << if i < (str.length - 1)
                         i += 1
                         case (str[i])
                         when ?n
                           "\n"
                         when ?t
                           "\t"
                         when ?\\
                           "\\"
                         else
                           "\\#{str[i]}"
                         end
                       else
                         "\\"
                       end
    else
      processed_str << str[i]
    end
    i += 1
  end
  processed_str
end

#read_character ⇒ `Object`

# File 'lib/rubylisp/tokenizer.rb', line 157

def read_character
  @position += 2
  start = @position
  @position += 1
  while !eof? && !divider?(@source[@position])
    @position += 1
  end

  return :CHARACTER, @source[start...@position]
end

#read_next_token ⇒ `Object`

# File 'lib/rubylisp/tokenizer.rb', line 169

def read_next_token
  return :EOF, '' if eof?

  while space?(@source[@position])
    @line_number += 1 if @source[@position] == ?\n
    @position += 1
    return :EOF, '' if eof?
  end

  current_ch = @source[@position]
  next_ch = @source[@position + 1] unless almost_eof?

  if letter?(current_ch) || ('*._'.include?(current_ch) && letter?(next_ch))
    return read_symbol
  elsif number?(current_ch)
    return read_number
  elsif current_ch == ?- && number?(next_ch)
    return read_number
  elsif current_ch == ?# && next_ch == ?x
    return read_number
  elsif current_ch == ?"
    return read_string
  elsif current_ch == ?# && next_ch == ?\\
    return read_character
  elsif current_ch == ?'
    @position += 1
    return :QUOTE, "'"
  elsif current_ch == ?`
    @position += 1
    return :BACKQUOTE, "`"
  elsif current_ch == ?, && next_ch == ?@
    @position += 2
    return :COMMAAT, ",@"
  elsif current_ch == ?,
    @position += 1
    return :COMMA, ","
  elsif current_ch == ?(
    @position += 1
    return :LPAREN, "("
  elsif current_ch == ?)
    @position += 1
    return :RPAREN, ")"
  elsif current_ch == ?{
    @position += 1
    return :LBRACE, "{"
  elsif current_ch == ?}
    @position += 1
    return :RBRACE, "}"
  elsif current_ch == ?[
    @position += 1
    return :LBRACKET, "["
  elsif current_ch == ?]
    @position += 1
    return :RBRACKET, "]"
  elsif current_ch == ?.
    @position += 1
    return :PERIOD, "."
  elsif current_ch == ?/ && next_ch == ?=
    @position += 2
    return :SYMBOL, "!="
  elsif current_ch == ?- && next_ch == ?>
    @position += 2
    return :SYMBOL, "->"
  elsif current_ch == ?= && next_ch == ?>
    @position += 2
    return :SYMBOL, "=>"
  elsif "+-*/%".include?(current_ch)
    @position += 1
    return :SYMBOL, current_ch.to_s
  elsif current_ch == ?< && next_ch == ?=
    @position += 2
    return :SYMBOL, "<="
  elsif current_ch == ?<
    @position += 1
    return :SYMBOL, "<"
  elsif current_ch == ?> && next_ch == ?=
    @position += 2
    return :SYMBOL, ">="
  elsif current_ch == ?>
    @position += 1
    return :SYMBOL, ">"
  elsif current_ch == ?= && next_ch == ?=
    @position += 2
    return :SYMBOL, "="
  elsif current_ch == ?=
    @position += 1
    return :SYMBOL, "="
  elsif current_ch == ?! && next_ch == ?=
    @position += 2
    return :SYMBOL, "!="
  elsif current_ch == ?!
    @position += 1
    return :SYMBOL, "!"
  elsif current_ch == ?# && next_ch == ?t
    @position += 2
    return :TRUE, "#t"
  elsif current_ch == ?# && next_ch == ?f
    @position += 2
    return :FALSE, "#f"
  elsif current_ch == ?;
    start = @position
    while true
      return :COMMENT, @source[start..-1] if eof?
      return :COMMENT, @source[start...@position] if @source[@position] == ?\n
      @position += 1
    end
  else
    return :ILLEGAL, ''
  end
end

#read_number ⇒ `Object`

# File 'lib/rubylisp/tokenizer.rb', line 85

def read_number
  start = @position
  @position += 1 if @source[@position] == '-'
  hex = @source[@position, 2] == "#x"
  is_float = false
  @position += 2 if hex
  ch = @source[@position]
  while !eof? && (digit?(ch) || (hex && hex?(ch)) || (!hex && !is_float && ch == ?.))
    is_float ||= (ch == ?.)
    @position += 1
    ch = @source[@position]
  end

  tok = if hex
          :HEXNUMBER
        elsif is_float
          :FLOAT
        else
          :NUMBER
        end

  return tok, @source[start...@position]
end

#read_string ⇒ `Object`

# File 'lib/rubylisp/tokenizer.rb', line 139

def read_string
  start = @position
  @position += 1
  while !eof? && @source[@position] != ?"
    @position += 1
  end

  return :EOF, '' if eof?
  @position += 1
  return :STRING, process_escapes(@source[start...@position])
end

#read_symbol ⇒ `Object`

# File 'lib/rubylisp/tokenizer.rb', line 58

def read_symbol
  start = @position
  tok = nil
  if @source[@position] == '.'
    @position += 1
    tok = :FFI_SEND_SYMBOL
  end

  while !eof? && (symbol_character?(@source[@position]) ||
                  (@source[@position] == '.' && !symbol_character?(@source[@position+1]) && tok.nil?) ||
                  (@source[@position] == '/' && symbol_character?(@source[@position+1])))
    tok ||= :FFI_NEW_SYMBOL if @source[@position] == '.'
    tok = :FFI_STATIC_SYMBOL if @source[@position] == '/'
    @position += 1
  end

  tok ||= :SYMBOL
  return tok, case tok
              when :SYMBOL, :FFI_STATIC_SYMBOL
                @source[start...@position]
              when :FFI_SEND_SYMBOL
                @source[start+1...@position]
              when :FFI_NEW_SYMBOL
                @source[start...@position-1]
              end
end

#space?(ch) ⇒ `Boolean`



47
48
49

# File 'lib/rubylisp/tokenizer.rb', line 47

def space?(ch)
  ch =~ /[[:space:]]/
end

#symbol_character?(ch) ⇒ `Boolean`

# File 'lib/rubylisp/tokenizer.rb', line 52

def symbol_character?(ch)
  return true if letter?(ch)
  return true if (?0..?9).include?(ch)
  return "-_?!:*=<>".include?(ch)
end

Class: Lisp::Tokenizer

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src) ⇒ Tokenizer

Instance Attribute Details

#line_number ⇒ Object (readonly)

Instance Method Details

#almost_eof? ⇒ Boolean

#consume_token ⇒ Object

#digit?(ch) ⇒ Boolean

#divider?(ch) ⇒ Boolean

#eof? ⇒ Boolean

#hex?(ch) ⇒ Boolean

#init ⇒ Object

#letter?(ch) ⇒ Boolean

#next_char ⇒ Object

#next_token ⇒ Object

#number?(ch) ⇒ Boolean

#process_escapes(str) ⇒ Object

#read_character ⇒ Object

#read_next_token ⇒ Object

#read_number ⇒ Object

#read_string ⇒ Object

#read_symbol ⇒ Object

#space?(ch) ⇒ Boolean

#symbol_character?(ch) ⇒ Boolean

#initialize(src) ⇒ `Tokenizer`

#line_number ⇒ `Object` (readonly)

#almost_eof? ⇒ `Boolean`

#consume_token ⇒ `Object`

#digit?(ch) ⇒ `Boolean`

#divider?(ch) ⇒ `Boolean`

#eof? ⇒ `Boolean`

#hex?(ch) ⇒ `Boolean`

#init ⇒ `Object`

#letter?(ch) ⇒ `Boolean`

#next_char ⇒ `Object`

#next_token ⇒ `Object`

#number?(ch) ⇒ `Boolean`

#process_escapes(str) ⇒ `Object`

#read_character ⇒ `Object`

#read_next_token ⇒ `Object`

#read_number ⇒ `Object`

#read_string ⇒ `Object`

#read_symbol ⇒ `Object`

#space?(ch) ⇒ `Boolean`

#symbol_character?(ch) ⇒ `Boolean`