Class: HTML::Encoder
- Inherits:
-
Object
- Object
- HTML::Encoder
- Defined in:
- lib/HTML/Encoder.rb,
lib/HTML/Encoder/Unicode.rb
Defined Under Namespace
Classes: Unicode
Instance Method Summary collapse
- #encode(string, *args) ⇒ Object
- #encode_hex(*args) ⇒ Object
-
#initialize ⇒ Encoder
constructor
A new instance of Encoder.
- #num_entity(char) ⇒ Object
Constructor Details
#initialize ⇒ Encoder
Returns a new instance of Encoder.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# File 'lib/HTML/Encoder.rb', line 7 def initialize() @entity2char = { 'amp' => '&', # ampersand 'gt' => '>', # greater than 'lt' => '<', # less than 'quot' => '"', # double quote 'apos' => "'", # single quote # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML 'AElig' => 198.chr, # capital AE diphthong (ligature) 'Aacute' => 193.chr, # capital A, acute accent 'Acirc' => 194.chr, # capital A, circumflex accent 'Agrave' => 192.chr, # capital A, grave accent 'Aring' => 197.chr, # capital A, ring 'Atilde' => 195.chr, # capital A, tilde 'Auml' => 196.chr, # capital A, dieresis or umlaut mark 'Ccedil' => 199.chr, # capital C, cedilla 'ETH' => 208.chr, # capital Eth, Icelandic 'Eacute' => 201.chr, # capital E, acute accent 'Ecirc' => 202.chr, # capital E, circumflex accent 'Egrave' => 200.chr, # capital E, grave accent 'Euml' => 203.chr, # capital E, dieresis or umlaut mark 'Iacute' => 205.chr, # capital I, acute accent 'Icirc' => 206.chr, # capital I, circumflex accent 'Igrave' => 204.chr, # capital I, grave accent 'Iuml' => 207.chr, # capital I, dieresis or umlaut mark 'Ntilde' => 209.chr, # capital N, tilde 'Oacute' => 211.chr, # capital O, acute accent 'Ocirc' => 212.chr, # capital O, circumflex accent 'Ograve' => 210.chr, # capital O, grave accent 'Oslash' => 216.chr, # capital O, slash 'Otilde' => 213.chr, # capital O, tilde 'Ouml' => 214.chr, # capital O, dieresis or umlaut mark 'THORN' => 222.chr, # capital THORN, Icelandic 'Uacute' => 218.chr, # capital U, acute accent 'Ucirc' => 219.chr, # capital U, circumflex accent 'Ugrave' => 217.chr, # capital U, grave accent 'Uuml' => 220.chr, # capital U, dieresis or umlaut mark 'Yacute' => 221.chr, # capital Y, acute accent 'aacute' => 225.chr, # small a, acute accent 'acirc' => 226.chr, # small a, circumflex accent 'aelig' => 230.chr, # small ae diphthong (ligature) 'agrave' => 224.chr, # small a, grave accent 'aring' => 229.chr, # small a, ring 'atilde' => 227.chr, # small a, tilde 'auml' => 228.chr, # small a, dieresis or umlaut mark 'ccedil' => 231.chr, # small c, cedilla 'eacute' => 233.chr, # small e, acute accent 'ecirc' => 234.chr, # small e, circumflex accent 'egrave' => 232.chr, # small e, grave accent 'eth' => 240.chr, # small eth, Icelandic 'euml' => 235.chr, # small e, dieresis or umlaut mark 'iacute' => 237.chr, # small i, acute accent 'icirc' => 238.chr, # small i, circumflex accent 'igrave' => 236.chr, # small i, grave accent 'iuml' => 239.chr, # small i, dieresis or umlaut mark 'ntilde' => 241.chr, # small n, tilde 'oacute' => 243.chr, # small o, acute accent 'ocirc' => 244.chr, # small o, circumflex accent 'ograve' => 242.chr, # small o, grave accent 'oslash' => 248.chr, # small o, slash 'otilde' => 245.chr, # small o, tilde 'ouml' => 246.chr, # small o, dieresis or umlaut mark 'szlig' => 223.chr, # small sharp s, German (sz ligature) 'thorn' => 254.chr, # small thorn, Icelandic 'uacute' => 250.chr, # small u, acute accent 'ucirc' => 251.chr, # small u, circumflex accent 'ugrave' => 249.chr, # small u, grave accent 'uuml' => 252.chr, # small u, dieresis or umlaut mark 'yacute' => 253.chr, # small y, acute accent 'yuml' => 255.chr, # small y, dieresis or umlaut mark # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) 'copy' => 169.chr, # copyright sign 'reg' => 174.chr, # registered sign 'nbsp' => 160.chr, # non breaking space # Additional ISO-8859/1 entities listed in rfc1866 (section 14) 'iexcl' => 161.chr, 'cent' => 162.chr, 'pound' => 163.chr, 'curren' => 164.chr, 'yen' => 165.chr, 'brvbar' => 166.chr, 'sect' => 167.chr, 'uml' => 168.chr, 'ordf' => 170.chr, 'laquo' => 171.chr, 'not' => 172.chr, 'shy' => 173.chr, 'macr' => 175.chr, 'deg' => 176.chr, 'plusmn' => 177.chr, 'sup1' => 185.chr, 'sup2' => 178.chr, 'sup3' => 179.chr, 'acute' => 180.chr, 'micro' => 181.chr, 'para' => 182.chr, 'middot' => 183.chr, 'cedil' => 184.chr, 'ordm' => 186.chr, 'raquo' => 187.chr, 'frac14' => 188.chr, 'frac12' => 189.chr, 'frac34' => 190.chr, 'iquest' => 191.chr, 'times' => 215.chr, 'divide' => 247.chr, } if RUBY_VERSION > '1.8.7' HTML::Encoder::Unicode.unicode_mapping.each{ |k,v| @entity2char[k] = v } end @char2entity = Hash[@entity2char.map { |k, v| [v, "&#{k};"] }] for i in 0..255 unless @char2entity.has_key?( i.chr ) @char2entity[i.chr] = "&##{i};"; end end end |
Instance Method Details
#encode(string, *args) ⇒ Object
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/HTML/Encoder.rb', line 135 def encode( string, *args ) if (! args[0].nil? and ! args[0].to_s.empty?) lookup = {} args[0].to_s.each_char{ |c| lookup[c] = @char2entity[c].nil? ? num_entity(c) : @char2entity[c] } string = string.to_s.gsub( /./ ) {|c| lookup[c].nil? ? c : lookup[c] } else # Encode control chars, high bit chars and '<', '&', '>', ''' and '"' string = string.to_s.gsub( /([^\n\r\t !\#\$%\(-;=?-~])/ ) {|c| @char2entity[c].nil? ? num_entity(c) : @char2entity[c] } end return string end |
#encode_hex(*args) ⇒ Object
154 155 156 157 158 159 160 |
# File 'lib/HTML/Encoder.rb', line 154 def encode_hex( *args ) tmp = @char2entity @char2entity = {} string = encode( *args ) @char2entity = tmp return string end |
#num_entity(char) ⇒ Object
162 163 164 |
# File 'lib/HTML/Encoder.rb', line 162 def num_entity( char ) return sprintf( '&#x%X;', char.unpack('C')[0] ) end |