Class: Rucc::Lexer::Impl

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/rucc/lexer/impl.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(files) ⇒ Impl

Returns a new instance of Impl.

Parameters:



13
14
15
16
17
18
19
# File 'lib/rucc/lexer/impl.rb', line 13

def initialize(files)
  @infile = files.first
  @files = files

  @buffers = [[]]  # stack buffers to impl peek.
  @token_gen = TokenGen.new(@files)
end

Instance Attribute Details

#infileObject (readonly)

Returns the value of attribute infile.



20
21
22
# File 'lib/rucc/lexer/impl.rb', line 20

def infile
  @infile
end

Instance Method Details

#current_fileFileIO

Returns:



199
200
201
# File 'lib/rucc/lexer/impl.rb', line 199

def current_file
  @files.current
end

#lexToken

Returns:



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/rucc/lexer/impl.rb', line 36

def lex
  buf = @buffers.last
  if buf.size > 0
    return buf.pop
  end

  if @buffers.size > 1
    return Token::EOF_TOKEN
  end

  bol = (current_file.column == 1)
  tok = do_read_token
  while tok.kind == T::SPACE
    tok = do_read_token
    tok.space = true
  end
  tok.bol = bol

  # NOTE: only for debug
  # if tok.kind == T::NEWLINE
  #   print "\n"
  # else
  #   print " " if tok.space
  #   print tok
  #   # print current_file.name
  # end

  tok
end

#lex_string(s) ⇒ Token

Reads a token from a given string. This function temporarily switches the main input stream to a given string and reads one token.

Parameters:

  • s (String)

Returns:



72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/rucc/lexer/impl.rb', line 72

def lex_string(s)
  @files.stream_stash([FileIO.new(StringIO.new(s), "-")])
  r = do_read_token
  next?("\n")
  p = get_pos(0)
  if peek != nil  # EOF
    raise "#{p}: unconsumed input: #{s}"
    # errorp(p, "unconsumed input: %s", s)
  end
  @files.stream_unstash
  r
end

#push_file(file) ⇒ Object

Parameters:



133
134
135
# File 'lib/rucc/lexer/impl.rb', line 133

def push_file(file)
  @files.push(file)
end

#read_header_file_name<String, Boolean>, <NilClass, NilClass>

Reads a header file name for #include.

Filenames after #include need a special tokenization treatment. A filename string may be quoted by < and > instead of “”. Even if it’s quoted by “”, it’s still different from a regular string token. For example, \ in this context is not interpreted as a quote. Thus, we cannot use lex() to read a filename.

That the C preprocessor requires a special lexer behavior only for #include is a violation of layering. Ideally, the lexer should be agnostic about higher layers status. But we need this for the C grammar.

Returns:

  • (<String, Boolean>, <NilClass, NilClass>)


98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/rucc/lexer/impl.rb', line 98

def read_header_file_name
  std = nil
  if !buffer_empty?
    return nil, std
  end

  skip_space!
  p = get_pos(0)
  if next?('"')
    std = false
    close = '"'
  elsif next?('<')
    std = true
    close = '>'
  else
    return nil, std
  end
  b = ""
  while !next?(close)
    c = readc
    if c.nil? || c == '\n'
      raise "#{p}: premature end of header name"
      # errorp(p, "premature end of header name");
    end
    b << c
  end
  if b.size == 0
    raise "#{p}: header name should not be empty"
    # errorp(p, "header name should not be empty");
  end

  return b, std
end

#skip_cond_incl!Object

Skips a block of code excluded from input by #if, #ifdef and the like. C11 6.10 says that code within #if and #endif needs to be a sequence of valid tokens even if skipped. However, in reality, most compilers don’t tokenize nor validate contents. We don’t do that, too. This function is to skip code until matching #endif as fast as we can.



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/rucc/lexer/impl.rb', line 156

def skip_cond_incl!
  nest = 0
  while true
    bol = current_file.column == 1
    skip_space!
    c = readc
    if c.nil?  # EOF
      return
    end
    if c == '\''
      skip_char!
      next
    end
    if c == '"'
      skip_string!
      next
    end
    if (c != '#' || !bol)
      next
    end
    column = current_file.column - 1
    tok = lex
    if (tok.kind != T::IDENT)
      next
    end
    if (nest == 0) && (Token.is_ident?(tok, "else") || Token.is_ident?(tok, "elif") || Token.is_ident?(tok, "endif"))
      unget_token(tok)
      hash = @token_gen.make_keyword('#')
      hash.bol = true
      hash.column = column
      unget_token(hash)
      return
    end
    if Token.is_ident?(tok, "if") || Token.is_ident?(tok, "ifdef") || Token.is_ident?(tok, "ifndef")
      nest += 1
    elsif (nest > 0) && Token.is_ident?(tok, "endif")
      nest -= 1
    end
    skip_line!
  end
end

#token_buffer_stash(buf) ⇒ Object

Temporarily switches the input token stream to given list of tokens, so that you can get the tokens as return values of lex() again. After the tokens are exhausted, EOF is returned from lex() until “unstash” is called to restore the original state.

Parameters:



143
144
145
# File 'lib/rucc/lexer/impl.rb', line 143

def token_buffer_stash(buf)
  @buffers.push(buf)
end

#token_buffer_unstashObject



147
148
149
# File 'lib/rucc/lexer/impl.rb', line 147

def token_buffer_unstash
  @buffers.pop
end

#unget_all(tokens) ⇒ Object

Parameters:



31
32
33
# File 'lib/rucc/lexer/impl.rb', line 31

def unget_all(tokens)
  tokens.reverse.each { |token| unget_token(token) }
end

#unget_token(tok) ⇒ Object

Parameters:



24
25
26
27
28
# File 'lib/rucc/lexer/impl.rb', line 24

def unget_token(tok)
  return if tok.kind == T::EOF  # Does not unget
  buf = @buffers.last
  buf.push(tok)
end