Module: Gammo::Parser::Foreign

Included in:
Gammo::Parser
Defined in:
lib/gammo/parser/foreign.rb

Overview

A set of methods and contants for parsing foreign content. Section 12.2.6.5.

Constant Summary collapse

BREAKOUT =

Element names that are broken out on parsing foreign content.

{
  "b" =>           true,
  "big" =>         true,
  "blockquote" =>  true,
  "body" =>        true,
  "br" =>          true,
  "center" =>      true,
  "code" =>        true,
  "dd" =>          true,
  "div" =>         true,
  "dl" =>          true,
  "dt" =>          true,
  "em" =>          true,
  "embed" =>       true,
  "h1" =>          true,
  "h2" =>          true,
  "h3" =>          true,
  "h4" =>          true,
  "h5" =>          true,
  "h6" =>          true,
  "head" =>        true,
  "hr" =>          true,
  "i" =>           true,
  "img" =>         true,
  "li" =>          true,
  "listing" =>     true,
  "menu" =>        true,
  "meta" =>        true,
  "nobr" =>        true,
  "ol" =>          true,
  "p" =>           true,
  "pre" =>         true,
  "ruby" =>        true,
  "s" =>           true,
  "small" =>       true,
  "span" =>        true,
  "strong" =>      true,
  "strike" =>      true,
  "sub" =>         true,
  "sup" =>         true,
  "table" =>       true,
  "tt" =>          true,
  "u" =>           true,
  "ul" =>          true,
  "var" =>         true
}.freeze
SVG_TAG_NAME_ADJUSTMENTS =

If the token’s tag name which is parsed as foreign content and has “svg” namespace matches with the key in the hash below, replace the key with corresponding value.

{
  "altglyph" =>             "altGlyph",
  "altglyphdef" =>          "altGlyphDef",
  "altglyphitem" =>         "altGlyphItem",
  "animatecolor" =>         "animateColor",
  "animatemotion" =>        "animateMotion",
  "animatetransform" =>     "animateTransform",
  "clippath" =>             "clipPath",
  "feblend" =>              "feBlend",
  "fecolormatrix" =>        "feColorMatrix",
  "fecomponenttransfer" =>  "feComponentTransfer",
  "fecomposite" =>          "feComposite",
  "feconvolvematrix" =>     "feConvolveMatrix",
  "fediffuselighting" =>    "feDiffuseLighting",
  "fedisplacementmap" =>    "feDisplacementMap",
  "fedistantlight" =>       "feDistantLight",
  "feflood" =>              "feFlood",
  "fefunca" =>              "feFuncA",
  "fefuncb" =>              "feFuncB",
  "fefuncg" =>              "feFuncG",
  "fefuncr" =>              "feFuncR",
  "fegaussianblur" =>       "feGaussianBlur",
  "feimage" =>              "feImage",
  "femerge" =>              "feMerge",
  "femergenode" =>          "feMergeNode",
  "femorphology" =>         "feMorphology",
  "feoffset" =>             "feOffset",
  "fepointlight" =>         "fePointLight",
  "fespecularlighting" =>   "feSpecularLighting",
  "fespotlight" =>          "feSpotLight",
  "fetile" =>               "feTile",
  "feturbulence" =>         "feTurbulence",
  "foreignobject" =>        "foreignObject",
  "glyphref" =>             "glyphRef",
  "lineargradient" =>       "linearGradient",
  "radialgradient" =>       "radialGradient",
  "textpath" =>             "textPath",
}.freeze
MATH_ML_ATTRIBUTE_ADJUSTMENTS =

If any attribute key of the current token which is parsed as foreign content and has “math” namespace matches with the key in the hash below, replace the key with corresponding value. Section 12.2.6.1. html.spec.whatwg.org/multipage/parsing.html#creating-and-inserting-nodes

{
  "definitionurl" => "definitionURL",
}.freeze
SVG_ATTRIBUTE_ADJUSTMENTS =

If any attribute key of the current token which is parsed as foreign content and has “svg” namespace matches with the key in the hash below, replace the key with corresponding value. Section 12.2.6.1. html.spec.whatwg.org/multipage/parsing.html#creating-and-inserting-nodes

{
  "attributename" =>              "attributeName",
  "attributetype" =>              "attributeType",
  "basefrequency" =>              "baseFrequency",
  "baseprofile" =>                "baseProfile",
  "calcmode" =>                   "calcMode",
  "clippathunits" =>              "clipPathUnits",
  "contentscripttype" =>          "contentScriptType",
  "contentstyletype" =>           "contentStyleType",
  "diffuseconstant" =>            "diffuseConstant",
  "edgemode" =>                   "edgeMode",
  "externalresourcesrequired" =>  "externalResourcesRequired",
  "filterunits" =>                "filterUnits",
  "glyphref" =>                   "glyphRef",
  "gradienttransform" =>          "gradientTransform",
  "gradientunits" =>              "gradientUnits",
  "kernelmatrix" =>               "kernelMatrix",
  "kernelunitlength" =>           "kernelUnitLength",
  "keypoints" =>                  "keyPoints",
  "keysplines" =>                 "keySplines",
  "keytimes" =>                   "keyTimes",
  "lengthadjust" =>               "lengthAdjust",
  "limitingconeangle" =>          "limitingConeAngle",
  "markerheight" =>               "markerHeight",
  "markerunits" =>                "markerUnits",
  "markerwidth" =>                "markerWidth",
  "maskcontentunits" =>           "maskContentUnits",
  "maskunits" =>                  "maskUnits",
  "numoctaves" =>                 "numOctaves",
  "pathlength" =>                 "pathLength",
  "patterncontentunits" =>        "patternContentUnits",
  "patterntransform" =>           "patternTransform",
  "patternunits" =>               "patternUnits",
  "pointsatx" =>                  "pointsAtX",
  "pointsaty" =>                  "pointsAtY",
  "pointsatz" =>                  "pointsAtZ",
  "preservealpha" =>              "preserveAlpha",
  "preserveaspectratio" =>        "preserveAspectRatio",
  "primitiveunits" =>             "primitiveUnits",
  "refx" =>                       "refX",
  "refy" =>                       "refY",
  "repeatcount" =>                "repeatCount",
  "repeatdur" =>                  "repeatDur",
  "requiredextensions" =>         "requiredExtensions",
  "requiredfeatures" =>           "requiredFeatures",
  "specularconstant" =>           "specularConstant",
  "specularexponent" =>           "specularExponent",
  "spreadmethod" =>               "spreadMethod",
  "startoffset" =>                "startOffset",
  "stddeviation" =>               "stdDeviation",
  "stitchtiles" =>                "stitchTiles",
  "surfacescale" =>               "surfaceScale",
  "systemlanguage" =>             "systemLanguage",
  "tablevalues" =>                "tableValues",
  "targetx" =>                    "targetX",
  "targety" =>                    "targetY",
  "textlength" =>                 "textLength",
  "viewbox" =>                    "viewBox",
  "viewtarget" =>                 "viewTarget",
  "xchannelselector" =>           "xChannelSelector",
  "ychannelselector" =>           "yChannelSelector",
  "zoomandpan" =>                 "zoomAndPan",
}.freeze

Instance Method Summary collapse

Instance Method Details

#adjust_attribute_names(attrs, map) ⇒ Object



288
289
290
# File 'lib/gammo/parser/foreign.rb', line 288

def adjust_attribute_names(attrs, map)
  attrs.each { |attr| attr.key = map[attr.key] if map.key?(attr.key) }
end

#adjust_foreign_attributes(attrs) ⇒ Object



292
293
294
295
296
297
298
299
300
301
302
303
304
# File 'lib/gammo/parser/foreign.rb', line 292

def adjust_foreign_attributes(attrs)
  attrs.each_with_index do |attr, index|
    next if attr.key == "" || !attr.key.start_with?(?x)
    case attr.key
    when "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role",
      "xlink:show", "xlink:title", "xlink:type", "xml:base", "xml:lang",
      "xml:space", "xmlns:xlink"
      j = attr.key.index(?:)
      attrs[index].namespace = attr.key.slice(0, j)
      attrs[index].key = attr.key.slice(j + 1 .. -1)
    end
  end
end

#html_integration_point?(node) ⇒ Boolean

Returns:

  • (Boolean)


269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/gammo/parser/foreign.rb', line 269

def html_integration_point?(node)
  return false unless node.instance_of? Node::Element
  case node.namespace
  when 'math'
    node.attributes.each do |attr|
      next unless attr.key == 'encoding'
      val = attr.value.downcase
      return true if val == 'text/html' || val == 'application/xhtml+xml'
    end if node.data == 'annotation-xml'
  when 'svg'
    case node.data
    when 'desc', 'foreignObject', 'title'
      return true
    end
  else return false
  end
  false
end

#in_foreign_content?Boolean

Returns:

  • (Boolean)


245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/gammo/parser/foreign.rb', line 245

def in_foreign_content?
  return false if open_elements.length.zero?
  node = adjusted_current_node
  return false unless node.namespace
  if math_ml_text_integration_point?(node)
    return false if token.instance_of?(Tokenizer::StartTagToken) && token.tag != Tags::Mglyph &&
      token.tag != Tags::Malignmark
    return false if token.instance_of?(Tokenizer::CharacterToken)
  end
  return false if node.namespace == 'math' && node.tag == Tags::AnnotationXml && \
    token.instance_of?(Tokenizer::StartTagToken) && token.tag == Tags::Svg
  return false if html_integration_point?(node) && (token.instance_of?(Tokenizer::StartTagToken) || token.instance_of?(Tokenizer::CharacterToken))
  return false if token.instance_of? Tokenizer::ErrorToken
  true
end

#math_ml_text_integration_point?(node) ⇒ Boolean

Returns:

  • (Boolean)


261
262
263
264
265
266
267
# File 'lib/gammo/parser/foreign.rb', line 261

def math_ml_text_integration_point?(node)
  return false unless node.namespace == 'math'
  case node.data
  when 'mi', 'mo', 'mn', 'ms', 'mtext' then return true
  else return false
  end
end

#parse_foreign_contentObject



178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/gammo/parser/foreign.rb', line 178

def parse_foreign_content
  case token
  when Tokenizer::CharacterToken
    self.frameset_ok = token.data.lstrip.sub(/\A\x00*/, '').lstrip.empty? if frameset_ok
    token.data = token.data.gsub(/\x00/, "\ufffd")
    add_text token.data
  when Tokenizer::CommentToken
    add_child Node::Comment.new(data: token.data)
  when Tokenizer::StartTagToken
    unless fragment?
      breakout = BREAKOUT[token.data]
      if token.tag == Tags::Font
        token.attributes.each do |attr|
          case attr.key
          when 'color', 'face', 'size'
            breakout = true
            break
          end
        end
      end
      if breakout
        open_elements.reverse_each_with_index do |elm, index|
          if !elm.namespace || html_integration_point?(elm) || math_ml_text_integration_point?(elm)
            self.open_elements = open_elements.slice(0, index + 1)
            break
          end
        end
        return false
      end
    end
    current = adjusted_current_node
    case current.namespace
    when 'math'
      adjust_attribute_names(token.attributes, MATH_ML_ATTRIBUTE_ADJUSTMENTS)
    when 'svg'
      x = SVG_TAG_NAME_ADJUSTMENTS[token.data]
      if x
        token.tag = Tags.lookup(x)
        token.data = x
      end
      adjust_attribute_names(token.attributes, SVG_ATTRIBUTE_ADJUSTMENTS)
    else
      raise ParseError, 'bad parser state: unexpected namespace'
    end
    adjust_foreign_attributes(token.attributes)
    namespace = current.namespace
    add_element
    top.namespace = namespace
    tokenizer.next_is_not_raw_text! if namespace
    if has_self_closing_token
      open_elements.pop
      acknowledge_self_closing_tag
    end
  when Tokenizer::EndTagToken
    open_elements.reverse_each_with_index do |elm, index|
      return insertion_mode.new(self).process unless elm.namespace
      if elm.data.downcase == token.data.downcase
        self.open_elements = open_elements.slice(0, index)
        break
      end
    end
    return true
  end
  # ignore the token
  true
end