Module: GSMEncoder

Defined in:: lib/gsm_encoder.rb

Overview

This class encodes and decodes Ruby Strings to and from the SMS default alphabet. It also supports the default extension table. The default alphabet and it’s extension table is defined in GSM 03.38.

Constant Summary collapse

DEFAULT_REPLACE_CHAR =

"?"

EXTENDED_ESCAPE =

0x1b

NL =

10.chr

CR =

13.chr

BS =

92.chr

CHAR_TABLE =

[
  '@', '£', '$', '¥', 'è', 'é', 'ù', 'ì',
  'ò', 'Ç',  NL, 'Ø', 'ø', CR , 'Å', 'å',
  'Δ', '_', 'Φ', 'Γ', 'Λ', 'Ω', 'Π', 'Ψ',
  'Σ', 'Θ', 'Ξ', " ", 'Æ', 'æ', 'ß', 'É', # 0x1B is actually an escape which we'll encode to a space char
  " ", '!', '"', '#', '¤', '%', '&', "'",
  '(', ')', '*', '+', ',', '-', '.', '/',
  '0', '1', '2', '3', '4', '5', '6', '7',
  '8', '9', ':', ';', '<', '=', '>', '?',
  '¡', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
  'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
  'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
  'X', 'Y', 'Z', 'Ä', 'Ö', 'Ñ', 'Ü', '§',
  '¿', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
  'x', 'y', 'z', 'ä', 'ö', 'ñ', 'ü', 'à',
].join

EXT_CHAR_TABLE = Extended character table. Characters in this table are accessed by the ‘escape’ character in the base table. It is important that none of the ‘inactive’ characters ever be matchable with a valid base-table character as this breaks the encoding loop.

[
  0,   0,   0, 0, 0,   0,   0, 0, 0,   'ç', 0, 0, 0,   0,   0,   0,
  0,   0,   0, 0, '^', 0,   0, 0, 0,   0,   0, 0, 0,   0,   0,   0,
  0,   0,   0, 0, 0,   0,   0, 0, '{', '}', 0, 0, 0,   0,   0,   BS,
  0,   0,   0, 0, 0,   0,   0, 0, 0,   0,   0, 0, '[', '~', ']', 0,
  '|', 'Á', 0, 0, 0,   0,   0, 0, 0,   'Í', 0, 0, 0,   0,   0,   'Ó',
  0,   0,   0, 0, 0,   'Ú', 0, 0, 0,   0,   0, 0, 0,   0,   0,   0,
  0,   'á', 0, 0, 0,   '€', 0, 0, 0,   'í', 0, 0, 0,   0,   0,   'ó',
  0,   0,   0, 0, 0,   'ú', 0, 0, 0,   0,   0, 0, 0,   0,   0,   0,
]

REGEX =

/\A[ -_a-~#{Regexp.escape(CHAR_TABLE + EXT_CHAR_TABLE.select {|c| c != 0}.join)}]*\Z/

Class Method Summary collapse

.can_encode?(str) ⇒ Boolean

Verifies that this charset can represent every character in the Ruby String.
.decode(bstring) ⇒ Object
.encode(str, replace_char = nil) ⇒ Object

Class Method Details

.can_encode?(str) ⇒ `Boolean`

Verifies that this charset can represent every character in the Ruby String.

Parameters:

str —

The String to verfiy

Returns:

(Boolean) —

True if the charset can represent every character in the Ruby String, otherwise false.



58
59
60

# File 'lib/gsm_encoder.rb', line 58

def can_encode?(str)
  !str || !!(REGEX =~ str)
end

.decode(bstring) ⇒ `Object`

# File 'lib/gsm_encoder.rb', line 86

def decode bstring
  return nil if !bstring

  buffer = ''.encode('utf-8')

  table = CHAR_TABLE
  bstring.bytes.each do |c|
    code = c & 0x000000ff
    if code == EXTENDED_ESCAPE
      # take next char from extension table
      table = EXT_CHAR_TABLE
    else
      buffer << (code >= table.length ? '?' : table[code])
      # go back to the default table
      table = CHAR_TABLE
    end
  end
  buffer
end

.encode(str, replace_char = nil) ⇒ `Object`

# File 'lib/gsm_encoder.rb', line 62

def encode(str, replace_char=nil)
  return nil if !str

  replace_char = DEFAULT_REPLACE_CHAR if !replace_char || !can_encode?(replace_char)

  buffer = ''.encode('binary')

  begin
    str.each_char do |c|
      if index = CHAR_TABLE.rindex(c)
        buffer << index
      elsif index = EXT_CHAR_TABLE.index(c)
        buffer << EXTENDED_ESCAPE
        buffer << index
      else
        buffer << replace_char
      end
    end
  rescue
    # TODO: ?
  end
  buffer
end