Class: HTML5::HTMLSerializer

Inherits:
Object
  • Object
show all
Defined in:
lib/html5/serializer/htmlserializer.rb

Direct Known Subclasses

XHTMLSerializer

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ HTMLSerializer

Returns a new instance of HTMLSerializer.



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/html5/serializer/htmlserializer.rb', line 15

def initialize(options={})
  @quote_attr_values           = false
  @quote_char                  = '"'
  @use_best_quote_char         = true
  @minimize_boolean_attributes = true

  @use_trailing_solidus          = false
  @space_before_trailing_solidus = true
  @escape_lt_in_attrs            = false
  @escape_rcdata                 = false

  @omit_optional_tags = true
  @sanitize           = false

  @strip_whitespace = false

  @inject_meta_charset = true

  options.each do |name, value|
    next unless instance_variable_defined?("@#{name}")
    @use_best_quote_char = false if name.to_s == 'quote_char'
    instance_variable_set("@#{name}", value)
  end

  @errors = []
end

Class Method Details

.serialize(stream, options = {}) ⇒ Object



7
8
9
# File 'lib/html5/serializer/htmlserializer.rb', line 7

def self.serialize(stream, options = {})
  new(options).serialize(stream, options[:encoding])
end

Instance Method Details

#escape(string) ⇒ Object



11
12
13
# File 'lib/html5/serializer/htmlserializer.rb', line 11

def escape(string)
  string.gsub("&", "&amp;").gsub("<", "&lt;").gsub(">", "&gt;")
end

#serialize(treewalker, encoding = nil) ⇒ Object Also known as: render



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/html5/serializer/htmlserializer.rb', line 42

def serialize(treewalker, encoding=nil)
  in_cdata = false
  @errors = []

  if encoding and @inject_meta_charset
    require 'html5/filters/inject_meta_charset'
    treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
  end

  if @strip_whitespace
    require 'html5/filters/whitespace'
    treewalker = Filters::WhitespaceFilter.new(treewalker)
  end

  if @sanitize
    require 'html5/filters/sanitizer'
    treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
  end

  if @omit_optional_tags
    require 'html5/filters/optionaltags'
    treewalker = Filters::OptionalTagFilter.new(treewalker)
  end

  result = []
  treewalker.each do |token|
    type = token[:type]
    if type == :Doctype
      doctype = "<!DOCTYPE %s>" % token[:name]
      result << doctype

    elsif [:Characters, :SpaceCharacters].include? type
      if type == :SpaceCharacters or in_cdata
        if in_cdata and token[:data].include?("</")
          serialize_error("Unexpected </ in CDATA")
        end
        result << token[:data]
      else
        result << escape(token[:data])
      end

    elsif [:StartTag, :EmptyTag].include? type
      name = token[:name]
      if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
        in_cdata = true
      elsif in_cdata
        serialize_error(_("Unexpected child element of a CDATA element"))
      end
      attributes = []
      for k,v in attrs = token[:data].to_a.sort
        attributes << ' '

        attributes << k
        if not @minimize_boolean_attributes or \
            (!(BOOLEAN_ATTRIBUTES[name]||[]).include?(k) \
            and !BOOLEAN_ATTRIBUTES[:global].include?(k))
          attributes << "="
          if @quote_attr_values or v.empty?
            quote_attr = true
          else
            quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
          end
          v = v.gsub("&", "&amp;")
          v = v.gsub("<", "&lt;") if @escape_lt_in_attrs
          if quote_attr
            quote_char = @quote_char
            if @use_best_quote_char
              if v.index("'") and !v.index('"')
                quote_char = '"'
              elsif v.index('"') and !v.index("'")
                quote_char = "'"
              end
            end
            if quote_char == "'"
              v = v.gsub("'", "&#39;")
            else
              v = v.gsub('"', "&quot;")
            end
            attributes << quote_char << v << quote_char
          else
            attributes << v
          end
        end
      end
      if VOID_ELEMENTS.include?(name) and @use_trailing_solidus
        if @space_before_trailing_solidus
          attributes << " /"
        else
          attributes << "/"
        end
      end
      result << "<%s%s>" % [name, attributes.join('')]

    elsif type == :EndTag
      name = token[:name]
      if RCDATA_ELEMENTS.include?(name)
        in_cdata = false
      elsif in_cdata
        serialize_error(_("Unexpected child element of a CDATA element"))
      end
      end_tag = "</#{name}>"
      result << end_tag

    elsif type == :Comment
      data = token[:data]
      serialize_error("Comment contains --") if data.index("--")
      comment = "<!--%s-->" % token[:data]
      result << comment

    else
      serialize_error(token[:data])
    end
  end

  if encoding and encoding != 'utf-8'
    require 'iconv'
    Iconv.iconv(encoding, 'utf-8', result.join('')).first
  else
    result.join('')
  end
end

#serialize_error(data = "XXX ERROR MESSAGE NEEDED") ⇒ Object



166
167
168
169
170
171
172
# File 'lib/html5/serializer/htmlserializer.rb', line 166

def serialize_error(data="XXX ERROR MESSAGE NEEDED")
  # XXX The idea is to make data mandatory.
  @errors.push(data)
  if @strict
    raise SerializeError
  end
end