Class: ByteCharacteristics

Inherits:
Characteristics show all
Defined in:
lib/characteristics/byte.rb

Constant Summary collapse

HAS_C1 =
/^(ISO-8859-)/
UNASSIGNED =
{
  0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
  0x83 => /^(Windows-(1250|1257))/,
  0x88 => /^(Windows-(1250|1253|1257))/,
  0x8A => /^(Windows-(1253|1255|1257|1258))/,
  0x8C => /^(Windows-(1253|1255|1257))/,
  0x8D => /^(Windows-(1252|1253|1254|1255|1258))/,
  0x8E => /^(Windows-(1253|1254|1255|1258))/,
  0x8F => /^(Windows-(1252|1253|1254|1255|1258))/,

  0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258))/,
  0x98 => /^(Windows-(1250|1251|1253|1257))/,
  0x9A => /^(Windows-(1253|1255|1257|1258))/,
  0x9C => /^(Windows-(1253|1255|1257))/,
  0x9D => /^(Windows-(1252|1253|1254|1255|1258))/,
  0x9E => /^(Windows-(1253|1254|1255|1258))/,
  0x9F => /^(Windows-(1253|1255|1257))/,

  0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/,
  0xA2 => /^(ISO-8859-(6))/,
  0xA3 => /^(ISO-8859-(6))/,
  0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/,
  0xA6 => /^(ISO-8859-(6))/,
  0xA7 => /^(ISO-8859-(6))/,
  0xA8 => /^(ISO-8859-(6))/,
  0xA9 => /^(ISO-8859-(6))/,
  0xAA => /^(ISO-8859-(6)|Windows-(1253))/,
  0xAB => /^(ISO-8859-(6))/,
  0xAE => /^(ISO-8859-(3|6|7))/,
  0xAF => /^(ISO-8859-(6))/,

  0xB0 => /^(ISO-8859-(6))/,
  0xB1 => /^(ISO-8859-(6))/,
  0xB2 => /^(ISO-8859-(6))/,
  0xB3 => /^(ISO-8859-(6))/,
  0xB4 => /^(ISO-8859-(6))/,
  0xB5 => /^(ISO-8859-(6))/,
  0xB6 => /^(ISO-8859-(6))/,
  0xB7 => /^(ISO-8859-(6))/,
  0xB8 => /^(ISO-8859-(6))/,
  0xB9 => /^(ISO-8859-(6))/,
  0xBA => /^(ISO-8859-(6))/,
  0xBC => /^(ISO-8859-(6))/,
  0xBD => /^(ISO-8859-(6))/,
  0xBE => /^(ISO-8859-(3|6))/,
  0xBF => /^(ISO-8859-(8))/,

  0xC0 => /^(ISO-8859-(6|8))/,
  0xC1 => /^(ISO-8859-(8))/,
  0xC2 => /^(ISO-8859-(8))/,
  0xC3 => /^(ISO-8859-(3|8))/,
  0xC4 => /^(ISO-8859-(8))/,
  0xC5 => /^(ISO-8859-(8))/,
  0xC6 => /^(ISO-8859-(8))/,
  0xC7 => /^(ISO-8859-(8))/,
  0xC8 => /^(ISO-8859-(8))/,
  0xC9 => /^(ISO-8859-(8))/,
  0xCA => /^(ISO-8859-(8))/,
  0xCB => /^(ISO-8859-(8))/,
  0xCC => /^(ISO-8859-(8))/,
  0xCD => /^(ISO-8859-(8))/,
  0xCE => /^(ISO-8859-(8))/,
  0xCF => /^(ISO-8859-(8))/,

  0xD0 => /^(ISO-8859-(3|8))/,
  0xD1 => /^(ISO-8859-(8))/,
  0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/,
  0xD3 => /^(ISO-8859-(8))/,
  0xD4 => /^(ISO-8859-(8))/,
  0xD5 => /^(ISO-8859-(8))/,
  0xD6 => /^(ISO-8859-(8))/,
  0xD7 => /^(ISO-8859-(8))/,
  0xD8 => /^(ISO-8859-(8))/,
  0xD9 => /^(ISO-8859-(8)|Windows-(1255))/,
  0xDA => /^(ISO-8859-(8)|Windows-(1255))/,
  0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
  0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
  0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
  0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
  0xDF => /^(ISO-8859-(6)|Windows-(1255))/,

  0xE3 => /^(ISO-8859-(3))/,

  0xF0 => /^(ISO-8859-(3))/,
  0xF3 => /^(ISO-8859-(6))/,
  0xF4 => /^(ISO-8859-(6))/,
  0xF5 => /^(ISO-8859-(6))/,
  0xF6 => /^(ISO-8859-(6))/,
  0xF7 => /^(ISO-8859-(6))/,
  0xF8 => /^(ISO-8859-(6))/,
  0xF9 => /^(ISO-8859-(6))/,
  0xFA => /^(ISO-8859-(6))/,
  0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/,
  0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255))/,
  0xFD => /^(ISO-8859-(6|11))/,
  0xFE => /^(ISO-8859-(6|11))/,
  0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255))/,
}.freeze
BLANKS =
[
  0x9,
  0x20,
].freeze
SEPARATORS =
[
  0xA,
  0xB,
  0xC,
  0xD,
].freeze
EXTRA_BLANKS =
{
  0xA0 => /^(ISO-8859-|Windows-125)/,
  0x9D => /^Windows-(1256)/,
  0x9F => /^Windows-(1256)/,
}.freeze

Constants inherited from Characteristics

Characteristics::VERSION

Instance Attribute Summary

Attributes inherited from Characteristics

#encoding

Instance Method Summary collapse

Methods inherited from Characteristics

create, create_for_type, type_from_encoding_name, #unicode?, #valid?

Constructor Details

#initialize(char) ⇒ ByteCharacteristics

Returns a new instance of ByteCharacteristics.



121
122
123
124
# File 'lib/characteristics/byte.rb', line 121

def initialize(char)
  super
  @ord = char.ord
end

Instance Method Details

#assigned?Boolean

Returns:

  • (Boolean)


140
141
142
# File 'lib/characteristics/byte.rb', line 140

def assigned?
  control? || UNASSIGNED[@ord] !~ @encoding_name
end

#blank?Boolean

Returns:

  • (Boolean)


160
161
162
163
164
# File 'lib/characteristics/byte.rb', line 160

def blank?
  BLANKS.include?(@ord) ||
  SEPARATORS.include?(@ord) ||
  EXTRA_BLANKS[@ord] =~ @encoding_name
end

#c0?Boolean

Returns:

  • (Boolean)


148
149
150
# File 'lib/characteristics/byte.rb', line 148

def c0?
  @ord < 0x20 && encoding_has_c0?
end

#c1?Boolean

Returns:

  • (Boolean)


152
153
154
# File 'lib/characteristics/byte.rb', line 152

def c1?
  @ord >= 0x80 && @ord < 0xA0 && encoding_has_c1?
end

#control?Boolean

Returns:

  • (Boolean)


144
145
146
# File 'lib/characteristics/byte.rb', line 144

def control?
  c0? || c1? || delete?
end

#delete?Boolean

Returns:

  • (Boolean)


156
157
158
# File 'lib/characteristics/byte.rb', line 156

def delete?
  @ord == 0x7F && encoding_has_delete?
end

#encoding_has_c0?Boolean

Returns:

  • (Boolean)


126
127
128
129
# File 'lib/characteristics/byte.rb', line 126

def encoding_has_c0?
  # !!(HAS_C0 =~ @encoding_name)
  true
end

#encoding_has_c1?Boolean

Returns:

  • (Boolean)


136
137
138
# File 'lib/characteristics/byte.rb', line 136

def encoding_has_c1?
  !!(HAS_C1 =~ @encoding_name)
end

#encoding_has_delete?Boolean

Returns:

  • (Boolean)


131
132
133
134
# File 'lib/characteristics/byte.rb', line 131

def encoding_has_delete?
  # !!(HAS_C0 =~ @encoding_name)
  true
end