Class: ORB::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/orb/tokenizer.rb

Constant Summary collapse

SPACE_CHARS =
[" ", "\s", "\t", "\r", "\n", "\f"].freeze
NAME_STOP_CHARS =
SPACE_CHARS + [">", "/", "="]
UNQUOTED_VALUE_INVALID_CHARS =
['"', "'", "=", "<", "`"].freeze
UNQUOTED_VALUE_STOP_CHARS =
SPACE_CHARS + [">"]
BLOCK_NAME_STOP_CHARS =
SPACE_CHARS + ["}"]
START_TAG_START =
"<"
START_TAG_END =
">"
START_TAG_END_SELF_CLOSING =
"/>"
END_TAG_START =
"</"
END_TAG_END =
">"
COMMENT_START =
"<!--"
COMMENT_END =
"-->"
PCOMMENT_START =
"{!--"
PCOMMENT_END =
"--}"
PEXPRESSION_START =
"{{"
PEXPRESSION_END =
"}}"
NPEXPRESSION_START =
"{%"
NPEXPRESSION_END =
"%}"
START_BLOCK_START =
"{#"
START_BLOCK_END =
"}"
END_BLOCK_START =
"{/"
END_BLOCK_END =
"}"
ERB_START =
"<%"
ERB_END =
"%>"
ATTRIBUTE_ASSIGN =
"="
SINGLE_QUOTE =
"'"
DOUBLE_QUOTE =
'"'
BRACE_OPEN =
"{"
BRACE_CLOSE =
"}"
CR =
"\r"
NL =
"\n"
CRLF =
"\r\n"
IGNORED_BODY_TAGS =
%w[script style].freeze
VOID_ELEMENTS =
%w[area base br col command embed hr img input keygen link meta param source track wbr].freeze
HUMAN_READABLE_STATE_NAMES =

For error messages

{
  initial: "Input",
  comment: "Comment",
  pcomment: "ORB Comment",
  tag_open: "Tag",
  tag_close: "Closing Tag",
  tag_name: "Tag Name",
  maybe_tag_open_end: "Tag",
  maybe_tag_close_end: "Closing Tag",
  tag_attribute: "Attribute",
  attribute_maybe_value: "Attribute Value",
  attribute_value_begin: "Attribute Value",
  attribute_value_double_quote: "Attribute Value",
  attribute_value_single_quote: "Attribute Value",
  attribute_value_expression: "Attribute Value",
  block_open: "Block",
  maybe_block_end: "Block",
  block_close: "Block",
  pexpression: "Expression",
  npexpression: "Expression",
  erb_expression: "Expression",
}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, opts = {}) ⇒ Tokenizer

Returns a new instance of Tokenizer.



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/orb/tokenizer.rb', line 69

def initialize(source, opts = {})
  @source = source
  @tokens = []
  @errors = []

  # Options
  @file = opts.fetch(:file, :nofile)
  @line = opts.fetch(:line, 1)
  @column = opts.fetch(:column, 1)
  @indentation = opts.fetch(:indentation, 0)
  @raise_errors = opts.fetch(:raise_errors, false)

  # State
  @cursor = 0
  @buffer = StringIO.new
  @current_line = @line
  @current_column = @column
  @column_offset = @indentation + 1
  @embedded_expression = false
  clear_braces
  clear_attributes
  transition_to(:initial)
end

Instance Attribute Details

#errorsObject (readonly)

Returns the value of attribute errors.



7
8
9
# File 'lib/orb/tokenizer.rb', line 7

def errors
  @errors
end

#tokensObject (readonly)

Returns the value of attribute tokens.



7
8
9
# File 'lib/orb/tokenizer.rb', line 7

def tokens
  @tokens
end

Instance Method Details

#tokenize!Object Also known as: tokenize

Main entry point, and only public method. Tokenize the source string and return the tokens. If any errors are encountered during tokenization, this method will raise the first error.



95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/orb/tokenizer.rb', line 95

def tokenize!
  next_token while @cursor < @source.length

  # Write out any remaining text in the buffer
  text = consume_buffer
  @tokens << Token.new(:text, text) unless text.strip.empty?

  # Run checks to ensure the tokenizer state is valid, report any errors
  check_tokenizer_state
  check_for_unclosed_blocks_or_tags

  @tokens
end