Class: Unisec::Properties

Inherits:
Object
  • Object
show all
Defined in:
lib/unisec/properties.rb

Overview

Manipulate Unicode properties

Class Method Summary collapse

Class Method Details

.char(chr) ⇒ Hash

Returns all properties of a given unicode character (code point)

Examples:

Unisec::Properties.char('é')
# =>
# {:age=>"1.1",
# … }


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/unisec/properties.rb', line 66

def self.char(chr)
  cp_num = TwitterCldr::Utils::CodePoints.from_string(chr)
  cp = TwitterCldr::Shared::CodePoint.get(cp_num.first)
  props = cp.properties
  props_hash = props.properties_hash.dup
  %w[Age Block General_Category Script].each { |p| props_hash.delete(p) } # Remaining properties
  categories = props.general_category.map do |cat|
    TwitterCldr::Shared::PropertyValueAliases.long_alias_for('gc', cat)
  end
  {
    age: props.age.join,
    block: props.block.join,
    category: categories[1],
    subcategory: categories[0],
    codepoint: Properties.char2codepoint(chr),
    name: cp.name,
    script: props.script.join,
    case: {
      ruby: {
        lowercase: chr.downcase,
        uppercase: chr.upcase
      },
      twitter: {
        lowercase: chr.localize.downcase.to_s,
        uppercase: chr.localize.upcase.to_s,
        titlecase: chr.localize.titlecase.to_s,
        casefold: chr.localize.casefold.to_s
      }
    },
    normalization: {
      ruby: {
        nfkd: chr.unicode_normalize(:nfkd),
        nfkc: chr.unicode_normalize(:nfkc),
        nfd: chr.unicode_normalize(:nfd),
        nfc: chr.unicode_normalize(:nfc)
      },
      twitter: {
        nfkd: chr.localize.normalize(using: :NFKD).to_s,
        nfkc: chr.localize.normalize(using: :NFKC).to_s,
        nfd: chr.localize.normalize(using: :NFD).to_s,
        nfc: chr.localize.normalize(using: :NFC).to_s
      }
    },
    other_properties: props_hash
  }
end

.char2codepoint(chr) ⇒ String

Display the code point in Unicode format for a given character (code point as string)

Examples:

Unisec::Properties.char2codepoint('💎') # => "U+1F48E"


160
161
162
# File 'lib/unisec/properties.rb', line 160

def self.char2codepoint(chr)
  Properties.deccp2stdhexcp(chr.codepoints.first)
end

.char_display(chr, extended: false) ⇒ Object

Display a CLI-friendly output listing all properties corresponding to character (code point)



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/unisec/properties.rb', line 117

def self.char_display(chr, extended: false)
  data = Properties.char(chr)
  display = ->(key, value) { puts Paint[key, :red, :bold].ljust(30) + " #{value}" }
  display.call('Name:', data[:name])
  display.call('Code Point:', data[:codepoint])
  puts
  display.call('Block:', data[:block])
  display.call('Category:', data[:category])
  display.call('Sub-Category:', data[:subcategory])
  display.call('Script:', data[:script])
  display.call('Since (age):', "Version #{data[:age]}")
  puts
  x = data.dig(:case, :twitter, :uppercase)
  display.call('Uppercase:', x + " (#{Properties.char2codepoint(x)})")
  x = data.dig(:case, :twitter, :lowercase)
  display.call('Lowercase:', x + " (#{Properties.char2codepoint(x)})")
  x = data.dig(:case, :twitter, :titlecase)
  display.call('Titlecase:', x + " (#{Properties.char2codepoint(x)})")
  x = data.dig(:case, :twitter, :casefold)
  display.call('Casefold:', x + " (#{Properties.char2codepoint(x)})")
  puts
  x = data.dig(:normalization, :twitter, :nfkd)
  display.call('Normalization NFKD:', x + " (#{Properties.chars2codepoints(x)})")
  x = data.dig(:normalization, :twitter, :nfkc)
  display.call('Normalization NFKC:', x + " (#{Properties.chars2codepoints(x)})")
  x = data.dig(:normalization, :twitter, :nfd)
  display.call('Normalization NFD:', x + " (#{Properties.chars2codepoints(x)})")
  x = data.dig(:normalization, :twitter, :nfc)
  display.call('Normalization NFC:', x + " (#{Properties.chars2codepoints(x)})")
  if extended
    puts
    data[:other_properties].each do |k, v|
      display.call(k, v&.join)
    end
  end
  nil
end

.chars2codepoints(chrs) ⇒ String

Display the code points in Unicode format for the given characters (code points as string)

Examples:

Unisec::Properties.chars2codepoints("ỳ́") # => "U+0079 U+0300 U+0301"
Unisec::Properties.chars2codepoints("🧑‍🌾") # => "U+1F9D1 U+200D U+1F33E"


170
171
172
173
174
175
176
# File 'lib/unisec/properties.rb', line 170

def self.chars2codepoints(chrs)
  out = []
  chrs.each_char do |chr|
    out << Properties.char2codepoint(chr)
  end
  out.join(' ')
end

.codepoints(prop) ⇒ Array<Hash>

List all code points for a given property

Examples:

Unisec::Properties.codepoints('Quotation_Mark')
# =>
# [{:char=>"\"", :codepoint=>34, :name=>"QUOTATION MARK"},
#  {:char=>"'", :codepoint=>39, :name=>"APOSTROPHE"},
#  … ]


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/unisec/properties.rb', line 26

def self.codepoints(prop)
  cp = TwitterCldr::Shared::CodePoint
  out = []
  ranges = cp.properties.code_points_for_property(prop).ranges
  ranges.each do |range|
    range.each do |i|
      codepoint = cp.get(i)
      out << {
        char: TwitterCldr::Utils::CodePoints.to_string([codepoint.code_point]),
        codepoint: codepoint.code_point,
        name: codepoint.name
      }
    end
  end
  out
end

.codepoints_display(prop) ⇒ Object

Display a CLI-friendly output listing all code points corresponding to a property.

Examples:

Unisec::Properties.codepoints_display('Quotation_Mark')
# =>
# U+0022      "    QUOTATION MARK
# U+0027      '    APOSTROPHE
# …


50
51
52
53
54
55
56
# File 'lib/unisec/properties.rb', line 50

def self.codepoints_display(prop)
  codepoints = Properties.codepoints(prop)
  codepoints.each do |cp|
    puts "#{Properties.deccp2stdhexcp(cp[:codepoint]).ljust(7)} #{cp[:char].ljust(4)} #{cp[:name]}"
  end
  nil
end

.deccp2stdhexcp(int_cp) ⇒ String

Convert from decimal code point to standardized format hexadecimal code point

Examples:

Unisec::Properties.intcp2stdhexcp(128640) # => "U+1F680"


183
184
185
# File 'lib/unisec/properties.rb', line 183

def self.deccp2stdhexcp(int_cp)
  "U+#{format('%.4x', int_cp).upcase}"
end

.listArray<String>

List Unicode properties name

Examples:

Unisec::Properties.list # => ["ASCII_Hex_Digit", "Age", "Alphabetic", … ]


13
14
15
# File 'lib/unisec/properties.rb', line 13

def self.list
  TwitterCldr::Shared::CodePoint.properties.property_names
end