Module: MDUrl::Decode

Defined in:
lib/mdurl-rb/decode.rb

Constant Summary collapse

DEFTAULT_CHARS =
';/?:@&=+$,#'
COMPONENT_CHARS =
''
@@decodeCache =
{}

Class Method Summary collapse

Class Method Details

.decode(string, exclude = nil) ⇒ Object

Decode percent-encoded string.




33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/mdurl-rb/decode.rb', line 33

def self.decode(string, exclude = nil)
  if !exclude.is_a? String
    exclude = DEFTAULT_CHARS
  end

  cache = getDecodeCache(exclude)

  return string.gsub(/(%[a-f0-9]{2})+/i) do |seq|
    result = ''

    i = 0
    l = seq.length
    while i < l
      b1 = seq.slice((i + 1)...(i + 3)).to_i(16)

      if (b1 < 0x80)
        result += cache[b1]
        i += 3
        next
      end

      if ((b1 & 0xE0) == 0xC0 && (i + 3 < l))
        # 110xxxxx 10xxxxxx
        b2 = seq.slice((i + 4)...(i + 6)).to_i(16)

        if ((b2 & 0xC0) == 0x80)
          char = ((b1 << 6) & 0x7C0) | (b2 & 0x3F)

          if (char < 0x80)
            result += "\ufffd\ufffd"
          else
            result += char.chr(Encoding::UTF_8)
          end

          i += 6
          next
        end
      end

      if ((b1 & 0xF0) == 0xE0 && (i + 6 < l))
        # 1110xxxx 10xxxxxx 10xxxxxx
        b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
        b3 = seq.slice((i + 7)...(i + 9)).to_i(16)

        if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80)
          char = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F)

          if (char < 0x800 || (char >= 0xD800 && char <= 0xDFFF))
            result += "\ufffd\ufffd\ufffd"
          else
            result += char.chr(Encoding::UTF_8)
          end

          i += 9
          next
        end
      end

      if ((b1 & 0xF8) == 0xF0 && (i + 9 < l))
        # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
        b2 = seq.slice((i + 4)...(i + 6)).to_i(16)
        b3 = seq.slice((i + 7)...(i + 9)).to_i(16)
        b4 = seq.slice((i + 10)...(i + 12)).to_i(16)

        if ((b2 & 0xC0) == 0x80 && (b3 & 0xC0) == 0x80 && (b4 & 0xC0) == 0x80)
          char = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F)

          if (char < 0x10000 || char > 0x10FFFF)
            result += "\ufffd\ufffd\ufffd\ufffd"
          else
            # TODO don't know how to handle surrogate pairs properly.
            char   -= 0x10000
            result += [0xD800 + (char >> 10), 0xDC00 + (char & 0x3FF)].map{|c| c.chr(Encoding::UTF_8)}.join

            # high = ((char - 0x10000) / 0x400).floor + 0xD800
            # low  = ((char - 0x10000) % 0x400) + 0xDC00
            # result += '\u' + [high, low].map { |x| x.to_s(16) }.join('\u').downcase
          end

          i += 12
          next
        end
      end

      result += "\ufffd"
      i += 3
    end

    result
  end
end

.getDecodeCache(exclude) ⇒ Object




11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/mdurl-rb/decode.rb', line 11

def self.getDecodeCache(exclude)
  cache = @@decodeCache[exclude]
  return cache if (cache)

  cache = @@decodeCache[exclude] = []

  (0...128).each do |i|
    ch = i.chr
    cache.push(ch)
  end

  (0...exclude.length).each do |i|
    ch = exclude[i].ord
    cache[ch] = '%' + ('0' + ch.to_s(16).upcase).slice(-2, 2)
  end

  return cache
end