Module: Cyberweb::CGI::Util

Included in:
Cyberweb::CGI
Defined in:
lib/cyberweb/cgi/util.rb,
lib/cyberweb/cgi/util.rb

Overview

Define the Util module, and extend class CGI with it.

Constant Summary collapse

US_ASCII_ENCODING =
#

US_ASCII_ENCODING

#
::Cyberweb::Encoding::US_ASCII
TABLE_FOR_ESCAPE_HTML__ =
#

Cyberweb::CGI::Util::TABLE_FOR_ESCAPE_HTML__

The set of special characters and their escaped values

#
{
  "'" => ''',
  '&' => '&',
  '"' => '"',
  '<' => '&lt;',
  '>' => '&gt;',
}
@@accept_charset =
'UTF-8'

Instance Method Summary collapse

Instance Method Details

#escape(i) ⇒ Object

#

escape

URL-encode a string.

Invocation example:

url_encoded_string = CGI.escape("'Stop!' said Fred")
   # => "%27Stop%21%27+said+Fred"
#


47
48
49
50
51
52
# File 'lib/cyberweb/cgi/util.rb', line 47

def escape(i)
  encoding = i.encoding
  i.b.gsub(/([^ a-zA-Z0-9_.\-~]+)/) { |m| # Tilde is escaped as well.
    '%' + m.unpack('H2' * m.bytesize).join('%').upcase
  }.tr(' ', '+').force_encoding(encoding)
end

#escape_element(string, *elements) ⇒ Object Also known as: escapeElement

#

escape_element

Escape only the tags of certain HTML elements in string.

Takes an element or elements or array of elements. Each element is specified by the name of the element, without angle brackets. This matches both the start and the end tag of that element. The attribute list of the open tag will also be escaped (for instance, the double-quotes surrounding attribute values).

print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
  # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"

print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
  # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
#


146
147
148
149
150
151
152
153
154
155
# File 'lib/cyberweb/cgi/util.rb', line 146

def escape_element(string, *elements)
  elements = elements.first if elements.first.is_a? Array
  unless elements.empty?
    string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do
      CGI.escapeHTML($&)
    end
  else
    string
  end
end

#escape_html(i) ⇒ Object Also known as: h, escapeHTML

#

escape_html

Escape special characters in HTML, namely &"<>

CGI.escapeHTML('Usage: foo "bar" <baz>')
   # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"
#


237
238
239
# File 'lib/cyberweb/cgi/util.rb', line 237

def escape_html(i)
  i.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
end

#pretty(string, shift = ' ') ⇒ Object

#

pretty

Prettify (indent) a HTML string.

string is the HTML string to indent.

shift is the indentation unit to use; it defaults to two spaces.

print CGI.pretty("<HTML><BODY></BODY></HTML>")
  # <HTML>
  #   <BODY>
  #   </BODY>
  # </HTML>

print CGI.pretty("<HTML><BODY></BODY></HTML>", "\t")
  # <HTML>
  #         <BODY>
  #         </BODY>
  # </HTML>
#


219
220
221
222
223
224
225
226
227
228
# File 'lib/cyberweb/cgi/util.rb', line 219

def pretty(string, shift = '  ')
  lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n")
  end_pos = 0
  while end_pos = lines.index(/^<\/(\w+)/, end_pos)
    element = $1.dup
    start_pos = lines.rindex(/^\s*<#{element}/i, end_pos)
    lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__"
  end
  lines.gsub(/^((?:#{Regexp.quote(shift)})*)__(?=<\/?\w)/, '\1')
end

#rfc1123_date(time) ⇒ Object

#

rfc1123_date

Format a Time object as a String using the format specified by RFC 1123.

CGI::rfc1123_date(Time.now)
  # Sat, 01 Jan 2000 00:00:00 GMT
#


189
190
191
192
193
194
195
196
# File 'lib/cyberweb/cgi/util.rb', line 189

def rfc1123_date(time)
  t = time.clone.gmtime
  return format(
    '%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT',
    RFC822_DAYS[t.wday], t.day, RFC822_MONTHS[t.month-1], t.year,
    t.hour, t.min, t.sec
  )
end

#unescape(string, encoding = @@accept_charset) ⇒ Object

#

unescape

URL-decode a string with encoding(optional).

string = CGI.unescape("%27Stop%21%27+said+Fred")
   # => "'Stop!' said Fred"
#


63
64
65
66
67
68
# File 'lib/cyberweb/cgi/util.rb', line 63

def unescape(string, encoding = @@accept_charset)
  str = string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m|
    [m.delete('%')].pack('H*')
  end.force_encoding(encoding)
  str.valid_encoding? ? str : str.force_encoding(string.encoding)
end

#unescape_element(string, *elements) ⇒ Object Also known as: unescapeElement

#

unescape_element

Undo escaping such as that done by CGI.escapeElement()

print CGI.unescapeElement(
        CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
  # "&lt;BR&gt;<A HREF="url"></A>"

print CGI.unescapeElement(
        CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
  # "&lt;BR&gt;<A HREF="url"></A>"
#


170
171
172
173
174
175
176
177
178
179
# File 'lib/cyberweb/cgi/util.rb', line 170

def unescape_element(string, *elements)
  elements = elements.first if elements.first.is_a? Array
  unless elements.empty?
    string.gsub(/&lt;\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?&gt;/i) do
      unescapeHTML($&)
    end
  else
    string
  end
end

#unescapeHTML(string) ⇒ Object Also known as: unescape_html

#

unescapeHTML

Unescape a string that has been HTML-escaped.

Usage example:

CGI.unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
   # => "Usage: foo \"bar\" <baz>"
#


80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/cyberweb/cgi/util.rb', line 80

def unescapeHTML(string)
  return string unless string.include? '&'
  enc = string.encoding
  if enc != Encoding::UTF_8 && [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc)
    return string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
      case $1.encode(US_ASCII_ENCODING)
      when 'apos'               then "'".encode(enc)
      when 'amp'                then '&'.encode(enc)
      when 'quot'               then '"'.encode(enc)
      when 'gt'                 then '>'.encode(enc)
      when 'lt'                 then '<'.encode(enc)
      when /\A#0*(\d+)\z/       then $1.to_i.chr(enc)
      when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
      end
    end
  end
  asciicompat = Encoding.compatible?(string, "a")
  string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
    match = $1.dup
    case match
    when 'apos' then "'"
    when 'amp'  then '&'
    when 'quot' then '"'
    when 'gt'   then '>'
    when 'lt'   then '<'
    when /\A#0*(\d+)\z/
      n = $1.to_i
      if enc == Encoding::UTF_8 or
        enc == Encoding::ISO_8859_1 && n < 256 or
        asciicompat && n < 128
        n.chr(enc)
      else
        "&##{$1};"
      end
    when /\A#x([0-9a-f]+)\z/i
      n = $1.hex
      if enc == Encoding::UTF_8 or
        enc == Encoding::ISO_8859_1 && n < 256 or
        asciicompat && n < 128
        n.chr(enc)
      else
        "&#x#{$1};"
      end
    else
      "&#{match};"
    end
  end
end