Module: MDUrl::Decode
- Defined in:
- lib/mdurl-rb/decode.rb
Constant Summary collapse
- DEFTAULT_CHARS =
';/?:@&=+$,#'
- COMPONENT_CHARS =
''
- @@decodeCache =
{}
Class Method Summary collapse
-
.decode(string, exclude = nil) ⇒ Object
Decode percent-encoded string.
-
.getDecodeCache(exclude) ⇒ Object
——————————————————————————.
Class Method Details
.decode(string, exclude = nil) ⇒ Object
Decode percent-encoded string.
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/mdurl-rb/decode.rb', line 33 def self.decode(string, exclude = nil) if !exclude.is_a? String exclude = DEFTAULT_CHARS end cache = getDecodeCache(exclude) return string.gsub(/(%[a-f0-9]{2})+/i) do |seq| result = '' i = 0 l = seq.length while i < l b1 = seq.slice((i + 1)...(i + 3)).to_i(16) if (b1 < 0x80) result += cache[b1] i += 3 next end if ((b1 & 0xE0) == 0xC0 && (i + 3 < l)) # 110xxxxx 10xxxxxx b2 = seq.slice((i + 4)...(i + 6)).to_i(16) if ((b2 & 0xC0) == 0x80) char = ((b1 << 6) & 0x7C0) | (b2 & 0x3F) if (char < 0x80) result += "\ufffd\ufffd" else result += char.chr(Encoding::UTF_8) end i += 6 next end end if ((b1 & 0xF0) == 0xE0 && (i + 6 < l)) # 1110xxxx 10xxxxxx 10xxxxxx b2 = seq.slice((i + 4)...(i + 6)).to_i(16) b3 = seq.slice((i + 7)...(i + 9)).to_i(16) if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80) char = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F) if (char < 0x800 || (char >= 0xD800 && char <= 0xDFFF)) result += "\ufffd\ufffd\ufffd" else result += char.chr(Encoding::UTF_8) end i += 9 next end end if ((b1 & 0xF8) == 0xF0 && (i + 9 < l)) # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx b2 = seq.slice((i + 4)...(i + 6)).to_i(16) b3 = seq.slice((i + 7)...(i + 9)).to_i(16) b4 = seq.slice((i + 10)...(i + 12)).to_i(16) if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80 && (b4 & 0xC0) == 0x80) char = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F) if (char < 0x10000 || char > 0x10FFFF) result += "\ufffd\ufffd\ufffd\ufffd" else # TODO don't know how to handle surrogate pairs properly. char -= 0x10000 result += [0xD800 + (char >> 10), 0xDC00 + (char & 0x3FF)].map{|c| c.chr(Encoding::UTF_8)}.join # high = ((char - 0x10000) / 0x400).floor + 0xD800 # low = ((char - 0x10000) % 0x400) + 0xDC00 # result += '\u' + [high, low].map { |x| x.to_s(16) }.join('\u').downcase end i += 12 next end end result += "\ufffd" i += 3 end result end end |
.getDecodeCache(exclude) ⇒ Object
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/mdurl-rb/decode.rb', line 11 def self.getDecodeCache(exclude) cache = @@decodeCache[exclude] return cache if (cache) cache = @@decodeCache[exclude] = [] (0...128).each do |i| ch = i.chr cache.push(ch) end (0...exclude.length).each do |i| ch = exclude[i].ord cache[ch] = '%' + ('0' + ch.to_s(16).upcase).slice(-2, 2) end return cache end |