Module: Cyberweb::CGI::Util

Included in:: Cyberweb::CGI

Defined in:: lib/cyberweb/cgi/util.rb,
lib/cyberweb/cgi/util.rb

Overview

Define the Util module, and extend class CGI with it.

Constant Summary collapse

US_ASCII_ENCODING = # US_ASCII_ENCODING #

::Cyberweb::Encoding::US_ASCII

TABLE_FOR_ESCAPE_HTML__ = # Cyberweb::CGI::Util::TABLE_FOR_ESCAPE_HTML__ The set of special characters and their escaped values #

{
  "'" => '&#39;',
  '&' => '&amp;',
  '"' => '&quot;',
  '<' => '&lt;',
  '>' => '&gt;',
}

@@accept_charset =

'UTF-8'

Instance Method Summary collapse

#escape(i) ⇒ Object

# === escape.
#escape_element(string, *elements) ⇒ Object (also: #escapeElement)

# === escape_element.
#escape_html(i) ⇒ Object (also: #h, #escapeHTML)

# === escape_html.
#pretty(string, shift = ' ') ⇒ Object

# === pretty.
#rfc1123_date(time) ⇒ Object

# === rfc1123_date.
#unescape(string, encoding = @@accept_charset) ⇒ Object

# === unescape.
#unescape_element(string, *elements) ⇒ Object (also: #unescapeElement)

# === unescape_element.
#unescapeHTML(string) ⇒ Object (also: #unescape_html)

# === unescapeHTML.

Instance Method Details

#escape(i) ⇒ `Object`

#

escape

URL-encode a string.

Invocation example:

url_encoded_string = CGI.escape("'Stop!' said Fred")
   # => "%27Stop%21%27+said+Fred"

#

# File 'lib/cyberweb/cgi/util.rb', line 47

def escape(i)
  encoding = i.encoding
  i.b.gsub(/([^ a-zA-Z0-9_.\-~]+)/) { |m| # Tilde is escaped as well.
    '%' + m.unpack('H2' * m.bytesize).join('%').upcase
  }.tr(' ', '+').force_encoding(encoding)
end

#escape_element(string, *elements) ⇒ `Object` Also known as: escapeElement

#

escape_element

Escape only the tags of certain HTML elements in string.

Takes an element or elements or array of elements. Each element is specified by the name of the element, without angle brackets. This matches both the start and the end tag of that element. The attribute list of the open tag will also be escaped (for instance, the double-quotes surrounding attribute values).

print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
  # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"

print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
  # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"

#

# File 'lib/cyberweb/cgi/util.rb', line 146

def escape_element(string, *elements)
  elements = elements.first if elements.first.is_a? Array
  unless elements.empty?
    string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do
      CGI.escapeHTML($&)
    end
  else
    string
  end
end

#escape_html(i) ⇒ `Object` Also known as: h, escapeHTML

#

escape_html

Escape special characters in HTML, namely &"<>

CGI.escapeHTML('Usage: foo "bar" <baz>')
   # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"

#



237
238
239

# File 'lib/cyberweb/cgi/util.rb', line 237

def escape_html(i)
  i.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
end

#pretty(string, shift = ' ') ⇒ `Object`

#

pretty

Prettify (indent) a HTML string.

string is the HTML string to indent.

shift is the indentation unit to use; it defaults to two spaces.

print CGI.pretty("<HTML><BODY></BODY></HTML>")
  # <HTML>
  #   <BODY>
  #   </BODY>
  # </HTML>

print CGI.pretty("<HTML><BODY></BODY></HTML>", "\t")
  # <HTML>
  #         <BODY>
  #         </BODY>
  # </HTML>

#

# File 'lib/cyberweb/cgi/util.rb', line 219

def pretty(string, shift = '  ')
  lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n")
  end_pos = 0
  while end_pos = lines.index(/^<\/(\w+)/, end_pos)
    element = $1.dup
    start_pos = lines.rindex(/^\s*<#{element}/i, end_pos)
    lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__"
  end
  lines.gsub(/^((?:#{Regexp.quote(shift)})*)__(?=<\/?\w)/, '\1')
end

#rfc1123_date(time) ⇒ `Object`

#

rfc1123_date

Format a Time object as a String using the format specified by RFC 1123.

CGI::rfc1123_date(Time.now)
  # Sat, 01 Jan 2000 00:00:00 GMT

#

# File 'lib/cyberweb/cgi/util.rb', line 189

def rfc1123_date(time)
  t = time.clone.gmtime
  return format(
    '%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT',
    RFC822_DAYS[t.wday], t.day, RFC822_MONTHS[t.month-1], t.year,
    t.hour, t.min, t.sec
  )
end

#unescape(string, encoding = @@accept_charset) ⇒ `Object`

#

unescape

URL-decode a string with encoding(optional).

string = CGI.unescape("%27Stop%21%27+said+Fred")
   # => "'Stop!' said Fred"

#

# File 'lib/cyberweb/cgi/util.rb', line 63

def unescape(string, encoding = @@accept_charset)
  str = string.tr('+', ' ').b.gsub(/((?:%[0-9a-fA-F]{2})+)/) do |m|
    [m.delete('%')].pack('H*')
  end.force_encoding(encoding)
  str.valid_encoding? ? str : str.force_encoding(string.encoding)
end

#unescape_element(string, *elements) ⇒ `Object` Also known as: unescapeElement

#

unescape_element

Undo escaping such as that done by CGI.escapeElement()

print CGI.unescapeElement(
        CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
  # "&lt;BR&gt;<A HREF="url"></A>"

print CGI.unescapeElement(
        CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
  # "&lt;BR&gt;<A HREF="url"></A>"

#

# File 'lib/cyberweb/cgi/util.rb', line 170

def unescape_element(string, *elements)
  elements = elements.first if elements.first.is_a? Array
  unless elements.empty?
    string.gsub(/&lt;\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?&gt;/i) do
      unescapeHTML($&)
    end
  else
    string
  end
end

#unescapeHTML(string) ⇒ `Object` Also known as: unescape_html

#

unescapeHTML

Unescape a string that has been HTML-escaped.

Usage example:

CGI.unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
   # => "Usage: foo \"bar\" <baz>"

#

# File 'lib/cyberweb/cgi/util.rb', line 80

def unescapeHTML(string)
  return string unless string.include? '&'
  enc = string.encoding
  if enc != Encoding::UTF_8 && [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc)
    return string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
      case $1.encode(US_ASCII_ENCODING)
      when 'apos'               then "'".encode(enc)
      when 'amp'                then '&'.encode(enc)
      when 'quot'               then '"'.encode(enc)
      when 'gt'                 then '>'.encode(enc)
      when 'lt'                 then '<'.encode(enc)
      when /\A#0*(\d+)\z/       then $1.to_i.chr(enc)
      when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
      end
    end
  end
  asciicompat = Encoding.compatible?(string, "a")
  string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
    match = $1.dup
    case match
    when 'apos' then "'"
    when 'amp'  then '&'
    when 'quot' then '"'
    when 'gt'   then '>'
    when 'lt'   then '<'
    when /\A#0*(\d+)\z/
      n = $1.to_i
      if enc == Encoding::UTF_8 or
        enc == Encoding::ISO_8859_1 && n < 256 or
        asciicompat && n < 128
        n.chr(enc)
      else
        "&##{$1};"
      end
    when /\A#x([0-9a-f]+)\z/i
      n = $1.hex
      if enc == Encoding::UTF_8 or
        enc == Encoding::ISO_8859_1 && n < 256 or
        asciicompat && n < 128
        n.chr(enc)
      else
        "&#x#{$1};"
      end
    else
      "&#{match};"
    end
  end
end

Module: Cyberweb::CGI::Util

Overview

Constant Summary collapse

#

US_ASCII_ENCODING

#

#

Cyberweb::CGI::Util::TABLE_FOR_ESCAPE_HTML__

#

Instance Method Summary collapse

# === escape.

# === escape_element.

# === escape_html.

# === pretty.

# === rfc1123_date.

# === unescape.

# === unescape_element.

# === unescapeHTML.

Instance Method Details

#escape(i) ⇒ Object

#

escape

#

#escape_element(string, *elements) ⇒ Object Also known as: escapeElement

#