Module: Addressable::IDNA

Extended by:
Gem::Deprecate
Defined in:
lib/addressable/idna/pure.rb,
lib/addressable/idna/native.rb

Defined Under Namespace

Classes: PunycodeBadInput, PunycodeBigOutput, PunycodeOverflow

Constant Summary collapse

UNICODE_TABLE =
File.expand_path(
  File.join(File.dirname(__FILE__), '../../..', 'data/unicode.data')
)
ACE_PREFIX =
"xn--"
UTF8_REGEX =
/\A(?:
[\x09\x0A\x0D\x20-\x7E]               # ASCII
| [\xC2-\xDF][\x80-\xBF]              # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF]          # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}   # straight 3-byte
| \xED[\x80-\x9F][\x80-\xBF]          # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2}       # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3}           # planes 4nil5
| \xF4[\x80-\x8F][\x80-\xBF]{2}       # plane 16
)*\z/mnx
UTF8_REGEX_MULTIBYTE =
/(?:
[\xC2-\xDF][\x80-\xBF]                # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF]          # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}   # straight 3-byte
| \xED[\x80-\x9F][\x80-\xBF]          # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2}       # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3}           # planes 4nil5
| \xF4[\x80-\x8F][\x80-\xBF]{2}       # plane 16
)/mnx
UNICODE_DATA_COMBINING_CLASS =
0
UNICODE_DATA_EXCLUSION =
1
UNICODE_DATA_CANONICAL =
2
UNICODE_DATA_COMPATIBILITY =
3
UNICODE_DATA_UPPERCASE =
4
UNICODE_DATA_LOWERCASE =
5
UNICODE_DATA_TITLECASE =
6
COMPOSITION_TABLE =
{}
UNICODE_MAX_LENGTH =
256
ACE_MAX_LENGTH =
256
PUNYCODE_BASE =
36
PUNYCODE_TMIN =
1
PUNYCODE_TMAX =
26
PUNYCODE_SKEW =
38
PUNYCODE_DAMP =
700
PUNYCODE_INITIAL_BIAS =
72
PUNYCODE_INITIAL_N =
0x80
PUNYCODE_DELIMITER =
0x2D
PUNYCODE_MAXINT =
1 << 64
PUNYCODE_PRINT_ASCII =
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" +
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" +
" !\"\#$%&'()*+,-./" +
"0123456789:;<=>?" +
"@ABCDEFGHIJKLMNO" +
"PQRSTUVWXYZ[\\]^_" +
"`abcdefghijklmno" +
"pqrstuvwxyz{|}~\n"

Class Method Summary collapse

Class Method Details

.punycode_decode(value) ⇒ Object



334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# File 'lib/addressable/idna/pure.rb', line 334

def self.punycode_decode(punycode)
  input = []
  output = []

  if ACE_MAX_LENGTH * 2 < punycode.size
    raise PunycodeBigOutput, "Output would exceed the space provided."
  end
  punycode.each_byte do |c|
    unless c >= 0 && c <= 127
      raise PunycodeBadInput, "Input is invalid."
    end
    input.push(c)
  end

  input_length = input.length
  output_length = [UNICODE_MAX_LENGTH]

  # Initialize the state
  n = PUNYCODE_INITIAL_N

  out = i = 0
  max_out = output_length[0]
  bias = PUNYCODE_INITIAL_BIAS

  # Handle the basic code points:  Let b be the number of input code
  # points before the last delimiter, or 0 if there is none, then
  # copy the first b code points to the output.

  b = 0
  input_length.times do |j|
    b = j if punycode_delimiter?(input[j])
  end
  if b > max_out
    raise PunycodeBigOutput, "Output would exceed the space provided."
  end

  b.times do |j|
    unless punycode_basic?(input[j])
      raise PunycodeBadInput, "Input is invalid."
    end
    output[out] = input[j]
    out+=1
  end

  # Main decoding loop:  Start just after the last delimiter if any
  # basic code points were copied; start at the beginning otherwise.

  in_ = b > 0 ? b + 1 : 0
  while in_ < input_length

    # in_ is the index of the next character to be consumed, and
    # out is the number of code points in the output array.

    # Decode a generalized variable-length integer into delta,
    # which gets added to i.  The overflow checking is easier
    # if we increase i as we go, then subtract off its starting
    # value at the end to obtain delta.

    oldi = i; w = 1; k = PUNYCODE_BASE
    while true
      if in_ >= input_length
        raise PunycodeBadInput, "Input is invalid."
      end
      digit = punycode_decode_digit(input[in_])
      in_+=1
      if digit >= PUNYCODE_BASE
        raise PunycodeBadInput, "Input is invalid."
      end
      if digit > (PUNYCODE_MAXINT - i) / w
        raise PunycodeOverflow, "Input needs wider integers to process."
      end
      i += digit * w
      t = (
        if k <= bias
          PUNYCODE_TMIN
        elsif k >= bias + PUNYCODE_TMAX
          PUNYCODE_TMAX
        else
          k - bias
        end
      )
      break if digit < t
      if w > PUNYCODE_MAXINT / (PUNYCODE_BASE - t)
        raise PunycodeOverflow, "Input needs wider integers to process."
      end
      w *= PUNYCODE_BASE - t
      k += PUNYCODE_BASE
    end

    bias = punycode_adapt(i - oldi, out + 1, oldi == 0)

    # I was supposed to wrap around from out + 1 to 0,
    # incrementing n each time, so we'll fix that now:

    if i / (out + 1) > PUNYCODE_MAXINT - n
      raise PunycodeOverflow, "Input needs wider integers to process."
    end
    n += i / (out + 1)
    i %= out + 1

    # Insert n at position i of the output:

    # not needed for Punycode:
    # raise PUNYCODE_INVALID_INPUT if decode_digit(n) <= base
    if out >= max_out
      raise PunycodeBigOutput, "Output would exceed the space provided."
    end

    #memmove(output + i + 1, output + i, (out - i) * sizeof *output)
    output[i + 1, out - i] = output[i, out - i]
    output[i] = n
    i += 1

    out += 1
  end

  output_length[0] = out

  output.pack("U*")
end

.punycode_encode(value) ⇒ Object



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# File 'lib/addressable/idna/pure.rb', line 213

def self.punycode_encode(unicode)
  unicode = unicode.to_s unless unicode.is_a?(String)
  input = unicode.unpack("U*")
  output = [0] * (ACE_MAX_LENGTH + 1)
  input_length = input.size
  output_length = [ACE_MAX_LENGTH]

  # Initialize the state
  n = PUNYCODE_INITIAL_N
  delta = out = 0
  max_out = output_length[0]
  bias = PUNYCODE_INITIAL_BIAS

  # Handle the basic code points:
  input_length.times do |j|
    if punycode_basic?(input[j])
      if max_out - out < 2
        raise PunycodeBigOutput,
          "Output would exceed the space provided."
      end
      output[out] = input[j]
      out += 1
    end
  end

  h = b = out

  # h is the number of code points that have been handled, b is the
  # number of basic code points, and out is the number of characters
  # that have been output.

  if b > 0
    output[out] = PUNYCODE_DELIMITER
    out += 1
  end

  # Main encoding loop:

  while h < input_length
    # All non-basic code points < n have been
    # handled already.  Find the next larger one:

    m = PUNYCODE_MAXINT
    input_length.times do |j|
      m = input[j] if (n...m) === input[j]
    end

    # Increase delta enough to advance the decoder's
    # <n,i> state to <m,0>, but guard against overflow:

    if m - n > (PUNYCODE_MAXINT - delta) / (h + 1)
      raise PunycodeOverflow, "Input needs wider integers to process."
    end
    delta += (m - n) * (h + 1)
    n = m

    input_length.times do |j|
      # Punycode does not need to check whether input[j] is basic:
      if input[j] < n
        delta += 1
        if delta == 0
          raise PunycodeOverflow,
            "Input needs wider integers to process."
        end
      end

      if input[j] == n
        # Represent delta as a generalized variable-length integer:

        q = delta; k = PUNYCODE_BASE
        while true
          if out >= max_out
            raise PunycodeBigOutput,
              "Output would exceed the space provided."
          end
          t = (
            if k <= bias
              PUNYCODE_TMIN
            elsif k >= bias + PUNYCODE_TMAX
              PUNYCODE_TMAX
            else
              k - bias
            end
          )
          break if q < t
          output[out] =
            punycode_encode_digit(t + (q - t) % (PUNYCODE_BASE - t))
          out += 1
          q = (q - t) / (PUNYCODE_BASE - t)
          k += PUNYCODE_BASE
        end

        output[out] = punycode_encode_digit(q)
        out += 1
        bias = punycode_adapt(delta, h + 1, h == b)
        delta = 0
        h += 1
      end
    end

    delta += 1
    n += 1
  end

  output_length[0] = out

  outlen = out
  outlen.times do |j|
    c = output[j]
    unless c >= 0 && c <= 127
      raise StandardError, "Invalid output char."
    end
    unless PUNYCODE_PRINT_ASCII[c]
      raise PunycodeBadInput, "Input is invalid."
    end
  end

  output[0..outlen].map { |x| x.chr }.join("").sub(/\0+\z/, "")
end

.to_ascii(value) ⇒ Object

Converts from a Unicode internationalized domain name to an ASCII domain name as described in RFC 3490.



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/addressable/idna/pure.rb', line 67

def self.to_ascii(input)
  input = input.to_s unless input.is_a?(String)
  input = input.dup.force_encoding(Encoding::UTF_8).unicode_normalize(:nfkc)
  if input.respond_to?(:force_encoding)
    input.force_encoding(Encoding::ASCII_8BIT)
  end
  if input =~ UTF8_REGEX && input =~ UTF8_REGEX_MULTIBYTE
    parts = unicode_downcase(input).split('.')
    parts.map! do |part|
      if part.respond_to?(:force_encoding)
        part.force_encoding(Encoding::ASCII_8BIT)
      end
      if part =~ UTF8_REGEX && part =~ UTF8_REGEX_MULTIBYTE
        ACE_PREFIX + punycode_encode(part)
      else
        part
      end
    end
    parts.join('.')
  else
    input
  end
end

.to_unicode(value) ⇒ Object

Converts from an ASCII domain name to a Unicode internationalized domain name as described in RFC 3490.



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/addressable/idna/pure.rb', line 93

def self.to_unicode(input)
  input = input.to_s unless input.is_a?(String)
  parts = input.split('.')
  parts.map! do |part|
    if part =~ /^#{ACE_PREFIX}(.+)/
      begin
        punycode_decode(part[/^#{ACE_PREFIX}(.+)/, 1])
      rescue Addressable::IDNA::PunycodeBadInput
        # toUnicode is explicitly defined as never-fails by the spec
        part
      end
    else
      part
    end
  end
  output = parts.join('.')
  if output.respond_to?(:force_encoding)
    output.force_encoding(Encoding::UTF_8)
  end
  output
end

.unicode_normalize_kc(value) ⇒ Object

Deprecated.

Use String#unicode_normalize(:nfkc) instead



117
118
119
# File 'lib/addressable/idna/pure.rb', line 117

def unicode_normalize_kc(value)
  value.to_s.unicode_normalize(:nfkc)
end