Class: SandlebarsParser

Inherits:
Object show all
Defined in:
lib/volt/server/html_parser/sandlebars_parser.rb

Overview

This is not a full html parser, but should cover most common cases.

Constant Summary collapse

START_TAG =

regex matchers

/^<([-!\:A-Za-z0-9_]+)((?:\s+[\w\-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/
END_TAG =
/^<\/([-!\:A-Za-z0-9_]+)[^>]*>/
ATTRIBUTES =
/([-\:A-Za-z0-9_]+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/
BLOCK =

Types of elements

truth_hash(%w{a address applet blockquote button center dd del dir div dl dt fieldset form frameset hr iframe ins isindex li map menu noframes noscript object ol p pre script table tbody td tfoot th thead tr ul})
EMPTY =
truth_hash(%w{area base basefont br col frame hr img input isindex link meta param embed})
INLINE =
truth_hash(%w{abbr acronym applet b basefont bdo big br button cite code del dfn em font i iframe img input ins kbd label map object q s samp script select small span strike strong sub sup textarea tt u var})
CLOSE_SELF =
truth_hash(%w{colgroup dd dt li options p td tfoot th thead tr})
SPECIAL =
truth_hash(%w{script style})
FILL_IN_ATTRIBUTES =
truth_hash(%w{checked compact declare defer disabled ismap multiple nohref noresize noshade nowrap readonly selected})

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(html, handler, file_path = nil) ⇒ SandlebarsParser

Returns a new instance of SandlebarsParser.



34
35
36
37
38
39
40
41
42
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 34

def initialize(html, handler, file_path=nil)
  @html = StringScanner.new(html)
  @handler = handler
  @file_path = file_path

  @stack = []

  parse
end

Class Method Details

.truth_hash(array) ⇒ Object



13
14
15
16
17
18
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 13

def self.truth_hash(array)
  hash = {}
  array.each {|v| hash[v] = true }

  return hash
end

Instance Method Details

#end_tag(tag, tag_name) ⇒ Object



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 195

def end_tag(tag, tag_name)
  # If no tag name is provided, close all the way up
  new_size = 0

  if tag
    # Find the closest tag that closes.
    (@stack.size-1).downto(0) do |index|
      if @stack[index] == tag_name
        new_size = index
        break
      end
    end
  end

  if new_size >= 0
    if @handler.respond_to?(:end_tag)
      (@stack.size-1).downto(new_size) do |index|
        @handler.end_tag(@stack[index])
      end
    end

    @stack = @stack[0...new_size]
  end
end

#lastObject



44
45
46
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 44

def last
  @stack.last
end

#parseObject



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 48

def parse
  loop do
    if last && SPECIAL[last]
      # In a script or style tag, just look for the first end
      close_tag = "</#{last}>"
      body = @html.scan_until(/#{close_tag}/)
      body = body[0..((-1 * close_tag.size)-1)]

      body = body.gsub(/\<\!--(.*?)--\>/, "\\1").gsub(/\<\!\[CDATA\[(.*?)\]\]\>/, "\\1")

      text(body)

      end_tag(last, last)
    elsif @html.scan(/\<\!--/)
      # start comment
      comment = @html.scan_until(/--\>/)
      comment = comment[0..-4]

      @handler.comment(comment) if @handler.respond_to?(:comment)
    elsif (tag = @html.scan(START_TAG))
      tag_name = @html[1]
      rest = @html[2]
      unary = @html[3]

      start_tag(tag, tag_name, rest, unary)
    elsif @html.scan(END_TAG)
      tag_name = @html[1]

      end_tag(tag_name, tag_name)
    elsif (escaped = @html.scan(/\{\{\{(.*?)\}\}\}([^\}]|$)/))
      # Anything between {{{ and }}} is escaped and not processed (treaded as text)
      if escaped[-1] != '}'
        # Move back if we matched a new non } for close, skip if we hit the end
        @html.pos = @html.pos - 1
      end

      text(@html[1])
    elsif (binding = @html.scan(/\{/))
      # We are in text mode and matched the start of a binding
      start_binding
    elsif (text = @html.scan(/(?:[^\<\{]+)/))
      # matched text up until the next html tag
      text(text)
    else
      # Nothing left
      break
    end
  end

  end_tag(nil, nil)
end

#raise_parse_error(error) ⇒ Object

Raises:



134
135
136
137
138
139
140
141
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 134

def raise_parse_error(error)
  line_number = @html.pre_match.count("\n") + 1

  error_str = error + " on line: #{line_number}"
  error_str += " of #{@file_path}" if @file_path

  raise HTMLParseError, error_str
end

#start_bindingObject

Findings the end of a binding



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 105

def start_binding
  binding = ''
  open_count = 1

  # scan until we reach a { or }
  loop do
    binding << @html.scan_until(/([\{\}\n]|\Z)/)

    match = @html[1]
    if match == '}'
      # close
      open_count -= 1
      break if open_count == 0
    elsif match == '{'
      # open more
      open_count += 1
    elsif match == "\n" || @html.eos?
      # Starting new tag, should be closed before this
      # or end of doc before closed binding
      raise_parse_error("unclosed binding: {#{binding.strip}")
    else
      raise "should not reach here"
    end
  end

  binding = binding[0..-2]
  @handler.binding(binding) if @handler.respond_to?(:binding)
end

#start_tag(tag, tag_name, rest, unary) ⇒ Object



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 143

def start_tag(tag, tag_name, rest, unary)
   = tag_name[0] == ':' && tag_name[1] =~ /[A-Z]/

  tag_name = tag_name.downcase

  # handle doctype so we get it output exactly the same way
  if tag_name == '!doctype'
    @handler.text(tag) if @handler.respond_to?(:start_tag)
    return
  end

  # Auto-close the last inline tag if we started a new block
  if BLOCK[tag_name]
    if last && INLINE[last]
      end_tag(nil, last)
    end
  end

  # Some tags close themselves when a new one of themselves is reached.
  # ex, a tr will close the previous tr
  if CLOSE_SELF[tag_name] && last == tag_name
    end_tag(nil, tag_name)
  end

  unary = EMPTY[tag_name] || !unary.blank?

  # Section tag's are also unary
  unless unary || 
    @stack.push(tag_name)
  end

  if @handler.respond_to?(:start_tag)
    attributes = {}

    # Take the rest string and extract the attributes, filling in any
    # "fill in" attribute values if not provided.
    rest.scan(ATTRIBUTES).each do |match|
      name = match[0]

      value = match[1] || match[2] || match[3] || FILL_IN_ATTRIBUTES[name] || ''

      attributes[name] = value
    end

    if 
      @handler.start_section(tag_name, attributes, unary)
    else
      @handler.start_tag(tag_name, attributes, unary)
    end
  end
end

#text(text) ⇒ Object



100
101
102
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 100

def text(text)
  @handler.text(text) if @handler.respond_to?(:text)
end