Class: UnicodeCharacteristics

Inherits:
Characteristics show all
Defined in:
lib/characteristics/unicode.rb

Constant Summary collapse

BLANKS =

Note that this list is maintained by hand and might not cover the latest Unicode updates Please open an issue or pull request is you find another character that is rendered invisible:

[
  0x0009,
  0x0020,
  0x00AD,
  0x034F,
  0x061C,
  0x070F,
  0x115F,
  0x1160,
  0x1680,
  0x17B4,
  0x17B5,
  0x180E,
  0x2000,
  0x2001,
  0x2002,
  0x2003,
  0x2004,
  0x2005,
  0x2006,
  0x2007,
  0x2008,
  0x2009,
  0x200A,
  0x200B,
  0x200C,
  0x200D,
  0x200E,
  0x200F,
  0x202F,
  0x205F,
  0x2060,
  0x2061,
  0x2062,
  0x2063,
  0x2064,
  0x206A,
  0x206B,
  0x206C,
  0x206D,
  0x206E,
  0x206F,
  0x3000,
  0x2800,
  0x3164,
  0xFEFF,
  0xFFA0,
  0x110B1,
  0x1BCA0,
  0x1BCA1,
  0x1BCA2,
  0x1BCA3,
  0x1D159,
  0x1D173,
  0x1D174,
  0x1D175,
  0x1D176,
  0x1D177,
  0x1D178,
  0x1D179,
  0x1D17A,
].freeze
SEPARATORS =
[
  0x000A,
  0x000B,
  0x000C,
  0x000D,
  0x0085,
  0x2028,
  0x2029,
].freeze
BIDI_CONTROL =
[
  0x061C,
  0x200E,
  0x200F,
  0x202A,
  0x202B,
  0x202C,
  0x202D,
  0x202E,
  0x2066,
  0x2067,
  0x2068,
  0x2069,
].freeze
VARIATION_SELECTORS =
[
  *0x180B..0x180D,
  *0xFE00..0xFE0F,
  *0xE0100..0xE01EF,
].freeze
TAGS =
[
  0xE0001,
  *0xE0020..0xE007F,
].freeze
NONCHARACTERS =
[
  *0xFDD0..0xFDEF,
  0xFFFE,   0xFFFF,
  0x1FFFE,  0x1FFFF,
  0x2FFFE,  0x2FFFF,
  0x3FFFE,  0x3FFFF,
  0x4FFFE,  0x4FFFF,
  0x5FFFE,  0x5FFFF,
  0x6FFFE,  0x6FFFF,
  0x7FFFE,  0x7FFFF,
  0x8FFFE,  0x8FFFF,
  0x9FFFE,  0x9FFFF,
  0xAFFFE,  0xAFFFF,
  0xBFFFE,  0xBFFFF,
  0xCFFFE,  0xCFFFF,
  0xDFFFE,  0xDFFFF,
  0xEFFFE,  0xEFFFF,
  0xFFFFE,  0xFFFFF,
  0x10FFFE, 0x10FFFF,
].freeze
IGNORABLE =
[
  0x00AD,
  0x034F,
  0x061C,
  *0x115F..0x1160,
  *0x17B4..0x17B5,
  *0x180B..0x180E,
  *0x200B..0x200F,
  *0x202A..0x202E,
  *0x2060..0x206F,
  0x3164,
  *0xFE00..0xFE0F,
  0xFEFF,
  0xFFA0,
  *0xFFF0..0xFFF8,
  *0x1BCA0..0x1BCA3,
  *0x1D173..0x1D17A,
  *0xE0000..0xE0FFF,
].freeze
KDDI =
[
  *0xE468..0xE5DF,
  *0xEA80..0xEB8E,
].freeze
SOFTBANK =
[
  *0xE001..0xE05A,
  *0xE101..0xE15A,
  *0xE201..0xE25A,
  *0xE301..0xE34D,
  *0xE401..0xE44C,
  *0xE501..0xE53E,
].freeze
DOCOMO =
[
  *0xE63E..0xE757,
].freeze

Constants inherited from Characteristics

Characteristics::UNICODE_VERSION, Characteristics::VERSION

Instance Attribute Summary collapse

Attributes inherited from Characteristics

#encoding

Instance Method Summary collapse

Methods inherited from Characteristics

create, create_for_type, type_from_encoding_name, #valid?

Constructor Details

#initialize(char) ⇒ UnicodeCharacteristics

Returns a new instance of UnicodeCharacteristics.



169
170
171
172
173
174
175
176
# File 'lib/characteristics/unicode.rb', line 169

def initialize(char)
  super

  if @is_valid
    @category = Unicode::Categories.category(char)
    @ord = char.ord
  end
end

Instance Attribute Details

#categoryObject (readonly)

Returns the value of attribute category.



167
168
169
# File 'lib/characteristics/unicode.rb', line 167

def category
  @category
end

Instance Method Details

#assigned?Boolean

Returns:

  • (Boolean)


182
183
184
# File 'lib/characteristics/unicode.rb', line 182

def assigned?
  @is_valid && @category != "Cn"
end

#bidi_control?Boolean

Returns:

  • (Boolean)


214
215
216
# File 'lib/characteristics/unicode.rb', line 214

def bidi_control?
  @is_valid && BIDI_CONTROL.include?(@ord)
end

#blank?Boolean

Returns:

  • (Boolean)


202
203
204
# File 'lib/characteristics/unicode.rb', line 202

def blank?
  @is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) )
end

#c0?Boolean

Returns:

  • (Boolean)


190
191
192
# File 'lib/characteristics/unicode.rb', line 190

def c0?
  @is_valid && @ord < 0x20
end

#c1?Boolean

Returns:

  • (Boolean)


198
199
200
# File 'lib/characteristics/unicode.rb', line 198

def c1?
  @is_valid && @ord >= 0x80 && @ord < 0xA0
end

#control?Boolean

Returns:

  • (Boolean)


186
187
188
# File 'lib/characteristics/unicode.rb', line 186

def control?
  @is_valid && @category == "Cc"
end

#delete?Boolean

Returns:

  • (Boolean)


194
195
196
# File 'lib/characteristics/unicode.rb', line 194

def delete?
  @is_valid && @ord == 0x7F
end

#docomo?Boolean

Returns:

  • (Boolean)


250
251
252
253
254
# File 'lib/characteristics/unicode.rb', line 250

def docomo?
  @is_valid &&
  encoding_has_docomo? &&
  DOCOMO.include?(@ord)
end

#format?Boolean

Returns:

  • (Boolean)


210
211
212
# File 'lib/characteristics/unicode.rb', line 210

def format?
  @is_valid && @category == "Cf"
end

#ignorable?Boolean

Returns:

  • (Boolean)


232
233
234
# File 'lib/characteristics/unicode.rb', line 232

def ignorable?
  @is_valid && IGNORABLE.include?(@ord)
end

#kddi?Boolean

emoji

Returns:

  • (Boolean)


238
239
240
241
242
# File 'lib/characteristics/unicode.rb', line 238

def kddi?
  @is_valid &&
  encoding_has_kddi? &&
  KDDI.include?(@ord)
end

#noncharacter?Boolean

Returns:

  • (Boolean)


228
229
230
# File 'lib/characteristics/unicode.rb', line 228

def noncharacter?
  @is_valid && NONCHARACTERS.include?(@ord)
end

#separator?Boolean

Returns:

  • (Boolean)


206
207
208
# File 'lib/characteristics/unicode.rb', line 206

def separator?
  @is_valid && SEPARATORS.include?(@ord)
end

#softbank?Boolean

Returns:

  • (Boolean)


244
245
246
247
248
# File 'lib/characteristics/unicode.rb', line 244

def softbank?
  @is_valid &&
  encoding_has_softbank? &&
  SOFTBANK.include?(@ord)
end

#tag?Boolean

Returns:

  • (Boolean)


224
225
226
# File 'lib/characteristics/unicode.rb', line 224

def tag?
  @is_valid && TAGS.include?(@ord)
end

#unicode?Boolean

Returns:

  • (Boolean)


178
179
180
# File 'lib/characteristics/unicode.rb', line 178

def unicode?
  true
end

#variation_selector?Boolean

unicode specific

Returns:

  • (Boolean)


220
221
222
# File 'lib/characteristics/unicode.rb', line 220

def variation_selector?
  @is_valid && VARIATION_SELECTORS.include?(@ord)
end