Class: SanitizeTest

Inherits:
Test::Unit::TestCase
  • Object
show all
Includes:
HTML5
Defined in:
lib/feed_tools/vendor/html5/tests/test_sanitizer.rb

Constant Summary

Constants included from HTML5

HTML5::ASCII_LETTERS, HTML5::ASCII_LOWERCASE, HTML5::ASCII_UPPERCASE, HTML5::BOOLEAN_ATTRIBUTES, HTML5::CDATA_ELEMENTS, HTML5::CONTENT_MODEL_FLAGS, HTML5::DIGITS, HTML5::E, HTML5::ENCODINGS, HTML5::ENTITIES, HTML5::ENTITIES_WINDOWS1252, HTML5::FORMATTING_ELEMENTS, HTML5::HEADING_ELEMENTS, HTML5::HEX_DIGITS, HTML5::Marker, HTML5::RCDATA_ELEMENTS, HTML5::SCOPING_ELEMENTS, HTML5::SPACE_CHARACTERS, HTML5::SPECIAL_ELEMENTS, HTML5::TABLE_INSERT_MODE_ELEMENTS, HTML5::VERSION, HTML5::VOID_ELEMENTS

Instance Method Summary collapse

Methods included from HTML5

_, parse, parse_fragment

Instance Method Details

#check_sanitization(input, htmloutput, xhtmloutput, rexmloutput) ⇒ Object



34
35
36
37
38
# File 'lib/feed_tools/vendor/html5/tests/test_sanitizer.rb', line 34

def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
  assert_equal htmloutput, sanitize_html(input)
  assert_equal xhtmloutput, sanitize_xhtml(input)
  assert_equal rexmloutput, sanitize_rexml(input)
end

#sanitize_html(stream) ⇒ Object



18
19
20
# File 'lib/feed_tools/vendor/html5/tests/test_sanitizer.rb', line 18

def sanitize_html stream
  HTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
end

#sanitize_rexml(stream) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
# File 'lib/feed_tools/vendor/html5/tests/test_sanitizer.rb', line 22

def sanitize_rexml stream
  require 'rexml/document'
  doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{stream}</div>")
  tokens = TreeWalkers.get_tree_walker('rexml').new(doc)
  XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
    :quote_char => "'",
    :inject_meta_charset => false,
    :sanitize => true}).gsub(/\A<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>\Z/m, '\1')
rescue REXML::ParseException
  return "Ill-formed XHTML!"
end

#sanitize_xhtml(stream) ⇒ Object



14
15
16
# File 'lib/feed_tools/vendor/html5/tests/test_sanitizer.rb', line 14

def sanitize_xhtml stream
  XHTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
end

#test_should_handle_astral_plane_charactersObject



113
114
115
116
117
118
119
120
121
# File 'lib/feed_tools/vendor/html5/tests/test_sanitizer.rb', line 113

def test_should_handle_astral_plane_characters
  input = "<p>&#x1d4b5; &#x1d538;</p>"
  output = "<p>\360\235\222\265 \360\235\224\270</p>"
  check_sanitization(input, output, output, output)

  input = "<p><tspan>\360\235\224\270</tspan> a</p>"
  output = "<p><tspan>\360\235\224\270</tspan> a</p>"
  check_sanitization(input, output, output, output)
end