Class: ORB::Tokenizer

Inherits:

Object

Object
ORB::Tokenizer

show all

Defined in:: lib/orb/tokenizer.rb

Constant Summary collapse

SPACE_CHARS =

[" ", "\s", "\t", "\r", "\n", "\f"].freeze

NAME_STOP_CHARS =

SPACE_CHARS + [">", "/", "="]

UNQUOTED_VALUE_INVALID_CHARS =

['"', "'", "=", "<", "`"].freeze

UNQUOTED_VALUE_STOP_CHARS =

SPACE_CHARS + [">"]

BLOCK_NAME_STOP_CHARS =

SPACE_CHARS + ["}"]

START_TAG_START =

"<"

START_TAG_END =

">"

START_TAG_END_SELF_CLOSING =

"/>"

END_TAG_START =

"</"

END_TAG_END =

">"

COMMENT_START =

"<!--"

COMMENT_END =

"-->"

PCOMMENT_START =

"{!--"

PCOMMENT_END =

"--}"

PEXPRESSION_START =

"{{"

PEXPRESSION_END =

"}}"

NPEXPRESSION_START =

"{%"

NPEXPRESSION_END =

"%}"

START_BLOCK_START =

"{#"

START_BLOCK_END =

"}"

END_BLOCK_START =

"{/"

END_BLOCK_END =

"}"

ERB_START =

"<%"

ERB_END =

"%>"

ATTRIBUTE_ASSIGN =

"="

SINGLE_QUOTE =

"'"

DOUBLE_QUOTE =

'"'

BRACE_OPEN =

"{"

BRACE_CLOSE =

"}"

CR =

"\r"

NL =

"\n"

CRLF =

"\r\n"

IGNORED_BODY_TAGS =

%w[script style].freeze

VOID_ELEMENTS =

%w[area base br col command embed hr img input keygen link meta param source track wbr].freeze

HUMAN_READABLE_STATE_NAMES = For error messages

{
  initial: "Input",
  comment: "Comment",
  pcomment: "ORB Comment",
  tag_open: "Tag",
  tag_close: "Closing Tag",
  tag_name: "Tag Name",
  maybe_tag_open_end: "Tag",
  maybe_tag_close_end: "Closing Tag",
  tag_attribute: "Attribute",
  attribute_maybe_value: "Attribute Value",
  attribute_value_begin: "Attribute Value",
  attribute_value_double_quote: "Attribute Value",
  attribute_value_single_quote: "Attribute Value",
  attribute_value_expression: "Attribute Value",
  block_open: "Block",
  maybe_block_end: "Block",
  block_close: "Block",
  pexpression: "Expression",
  npexpression: "Expression",
  erb_expression: "Expression",
}.freeze

Instance Attribute Summary collapse

#errors ⇒ Object readonly

Returns the value of attribute errors.
#tokens ⇒ Object readonly

Returns the value of attribute tokens.

Instance Method Summary collapse

#initialize(source, opts = {}) ⇒ Tokenizer constructor

A new instance of Tokenizer.
#tokenize! ⇒ Object (also: #tokenize)

Main entry point, and only public method.

Constructor Details

#initialize(source, opts = {}) ⇒ `Tokenizer`

Returns a new instance of Tokenizer.

# File 'lib/orb/tokenizer.rb', line 69

def initialize(source, opts = {})
  @source = source
  @tokens = []
  @errors = []

  # Options
  @file = opts.fetch(:file, :nofile)
  @line = opts.fetch(:line, 1)
  @column = opts.fetch(:column, 1)
  @indentation = opts.fetch(:indentation, 0)
  @raise_errors = opts.fetch(:raise_errors, false)

  # State
  @cursor = 0
  @buffer = StringIO.new
  @current_line = @line
  @current_column = @column
  @column_offset = @indentation + 1
  @embedded_expression = false
  clear_braces
  clear_attributes
  transition_to(:initial)
end

Instance Attribute Details

#errors ⇒ `Object` (readonly)

Returns the value of attribute errors.



7
8
9

# File 'lib/orb/tokenizer.rb', line 7

def errors
  @errors
end

#tokens ⇒ `Object` (readonly)

Returns the value of attribute tokens.



7
8
9

# File 'lib/orb/tokenizer.rb', line 7

def tokens
  @tokens
end

Instance Method Details

#tokenize! ⇒ `Object` Also known as: tokenize

Main entry point, and only public method. Tokenize the source string and return the tokens. If any errors are encountered during tokenization, this method will raise the first error.

# File 'lib/orb/tokenizer.rb', line 95

def tokenize!
  next_token while @cursor < @source.length

  # Write out any remaining text in the buffer
  text = consume_buffer
  @tokens << Token.new(:text, text) unless text.strip.empty?

  # Run checks to ensure the tokenizer state is valid, report any errors
  check_tokenizer_state
  check_for_unclosed_blocks_or_tags

  @tokens
end