Module: HTMLEntities

Defined in:
lib/wiki_lyrics/utils/htmlentities.rb

Overview

Copyright © 2006-2008 by Sergio Pistone [email protected]

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

Constant Summary collapse

@@named_entities =
{
  "quot" => 34,
  "amp" => 38,
  "lt" => 60,
  "gt" => 62,
  "OElig" => 338,
  "oelig" => 339,
  "Scaron" => 352,
  "scaron" => 353,
  "Yuml" => 376,
  "circ" => 710,
  "tilde" => 732,
  "ensp" => 8194,
  "emsp" => 8195,
  "thinsp" => 8201,
  "zwnj" => 8204,
  "zwj" => 8205,
  "lrm" => 8206,
  "rlm" => 8207,
  "ndash" => 8211,
  "mdash" => 8212,
  "lsquo" => 8216,
  "rsquo" => 8217,
  "sbquo" => 8218,
  "ldquo" => 8220,
  "rdquo" => 8221,
  "bdquo" => 8222,
  "dagger" => 8224,
  "Dagger" => 8225,
  "permil" => 8240,
  "lsaquo" => 8249,
  "rsaquo" => 8250,
  "euro" => 8364,
  "fnof" => 402,
  "Alpha" => 913,
  "Beta" => 914,
  "Gamma" => 915,
  "Delta" => 916,
  "Epsilon" => 917,
  "Zeta" => 918,
  "Eta" => 919,
  "Theta" => 920,
  "Iota" => 921,
  "Kappa" => 922,
  "Lambda" => 923,
  "Mu" => 924,
  "Nu" => 925,
  "Xi" => 926,
  "Omicron" => 927,
  "Pi" => 928,
  "Rho" => 929,
  "Sigma" => 931,
  "Tau" => 932,
  "Upsilon" => 933,
  "Phi" => 934,
  "Chi" => 935,
  "Psi" => 936,
  "Omega" => 937,
  "alpha" => 945,
  "beta" => 946,
  "gamma" => 947,
  "delta" => 948,
  "epsilon" => 949,
  "zeta" => 950,
  "eta" => 951,
  "theta" => 952,
  "iota" => 953,
  "kappa" => 954,
  "lambda" => 955,
  "mu" => 956,
  "nu" => 957,
  "xi" => 958,
  "omicron" => 959,
  "pi" => 960,
  "rho" => 961,
  "sigmaf" => 962,
  "sigma" => 963,
  "tau" => 964,
  "upsilon" => 965,
  "phi" => 966,
  "chi" => 967,
  "psi" => 968,
  "omega" => 969,
  "thetasym" => 977,
  "upsih" => 978,
  "piv" => 982,
  "bull" => 8226,
  "hellip" => 8230,
  "prime" => 8242,
  "Prime" => 8243,
  "oline" => 8254,
  "frasl" => 8260,
  "weierp" => 8472,
  "image" => 8465,
  "real" => 8476,
  "trade" => 8482,
  "alefsym" => 8501,
  "larr" => 8592,
  "uarr" => 8593,
  "rarr" => 8594,
  "darr" => 8595,
  "harr" => 8596,
  "crarr" => 8629,
  "lArr" => 8656,
  "uArr" => 8657,
  "rArr" => 8658,
  "dArr" => 8659,
  "hArr" => 8660,
  "forall" => 8704,
  "part" => 8706,
  "exist" => 8707,
  "empty" => 8709,
  "nabla" => 8711,
  "isin" => 8712,
  "notin" => 8713,
  "ni" => 8715,
  "prod" => 8719,
  "sum" => 8721,
  "minus" => 8722,
  "lowast" => 8727,
  "radic" => 8730,
  "prop" => 8733,
  "infin" => 8734,
  "ang" => 8736,
  "and" => 8743,
  "or" => 8744,
  "cap" => 8745,
  "cup" => 8746,
  "int" => 8747,
  "there4" => 8756,
  "sim" => 8764,
  "cong" => 8773,
  "asymp" => 8776,
  "ne" => 8800,
  "equiv" => 8801,
  "le" => 8804,
  "ge" => 8805,
  "sub" => 8834,
  "sup" => 8835,
  "nsub" => 8836,
  "sube" => 8838,
  "supe" => 8839,
  "oplus" => 8853,
  "otimes" => 8855,
  "perp" => 8869,
  "sdot" => 8901,
  "lceil" => 8968,
  "rceil" => 8969,
  "lfloor" => 8970,
  "rfloor" => 8971,
  "lang" => 9001,
  "rang" => 9002,
  "loz" => 9674,
  "spades" => 9824,
  "clubs" => 9827,
  "hearts" => 9829,
  "diams" => 9830,
  "nbsp" => 160,
  "iexcl" => 161,
  "cent" => 162,
  "pound" => 163,
  "curren" => 164,
  "yen" => 165,
  "brvbar" => 166,
  "sect" => 167,
  "uml" => 168,
  "copy" => 169,
  "ordf" => 170,
  "laquo" => 171,
  "not" => 172,
  "shy" => 173,
  "reg" => 174,
  "macr" => 175,
  "deg" => 176,
  "plusmn" => 177,
  "sup2" => 178,
  "sup3" => 179,
  "acute" => 180,
  "micro" => 181,
  "para" => 182,
  "middot" => 183,
  "cedil" => 184,
  "sup1" => 185,
  "ordm" => 186,
  "raquo" => 187,
  "frac14" => 188,
  "frac12" => 189,
  "frac34" => 190,
  "iquest" => 191,
  "Agrave" => 192,
  "Aacute" => 193,
  "Acirc" => 194,
  "Atilde" => 195,
  "Auml" => 196,
  "Aring" => 197,
  "AElig" => 198,
  "Ccedil" => 199,
  "Egrave" => 200,
  "Eacute" => 201,
  "Ecirc" => 202,
  "Euml" => 203,
  "Igrave" => 204,
  "Iacute" => 205,
  "Icirc" => 206,
  "Iuml" => 207,
  "ETH" => 208,
  "Ntilde" => 209,
  "Ograve" => 210,
  "Oacute" => 211,
  "Ocirc" => 212,
  "Otilde" => 213,
  "Ouml" => 214,
  "times" => 215,
  "Oslash" => 216,
  "Ugrave" => 217,
  "Uacute" => 218,
  "Ucirc" => 219,
  "Uuml" => 220,
  "Yacute" => 221,
  "THORN" => 222,
  "szlig" => 223,
  "agrave" => 224,
  "aacute" => 225,
  "acirc" => 226,
  "atilde" => 227,
  "auml" => 228,
  "aring" => 229,
  "aelig" => 230,
  "ccedil" => 231,
  "egrave" => 232,
  "eacute" => 233,
  "ecirc" => 234,
  "euml" => 235,
  "igrave" => 236,
  "iacute" => 237,
  "icirc" => 238,
  "iuml" => 239,
  "eth" => 240,
  "ntilde" => 241,
  "ograve" => 242,
  "oacute" => 243,
  "ocirc" => 244,
  "otilde" => 245,
  "ouml" => 246,
  "divide" => 247,
  "oslash" => 248,
  "ugrave" => 249,
  "uacute" => 250,
  "ucirc" => 251,
  "uuml" => 252,
  "yacute" => 253,
  "thorn" => 254,
  "yuml" => 255,
}

Class Method Summary collapse

Class Method Details

.decode(text) ⇒ Object



28
29
30
31
32
# File 'lib/wiki_lyrics/utils/htmlentities.rb', line 28

def HTMLEntities.decode( text )
  text = String.new( text )
  HTMLEntities.decode!( text )
  return text
end

.decode!(text) ⇒ Object



21
22
23
24
25
26
# File 'lib/wiki_lyrics/utils/htmlentities.rb', line 21

def HTMLEntities.decode!( text )
  aux1 = text.gsub!( /&([a-zA-Z]+);/ ) { (n = @@named_entities[$1]) ? [n].pack( "U" ) : $0 }
  aux2 = text.gsub!( /&#(\d{2,4});/ ) { [$1.to_i()].pack( "U" ) }
  aux3 = text.gsub!( /&#x([0-9a-fA-F]{2,4});/ ) { [s.to_i(16)].pack( "U" ) }
  return (aux1||aux2||aux3) ? text : nil
end