Module: Uniscribe

Defined in:
lib/uniscribe.rb,
lib/uniscribe/version.rb

Constant Summary collapse

UNICODE_VERSION_GLYPH_DETECTION =
RUBY_ENGINE == "ruby" &&
Unicode::Version.unicode_version
SUPPORTED_ENCODINGS =
Encoding.name_list.grep(
  Regexp.union(
    /^UTF-8$/,
    /^UTF8-/,
    /^UTF-...E$/,
    /^US-ASCII$/,
    /^ISO-8859-1$/,
  )
).sort.freeze
COLORS =
{
  control: "#0000FF",
  blank: "#33AADD",
  format: "#FF00FF",
  mark: "#228822",
  unassigned: "#FF5500",
  ignorable: "#FFAA00",
}
VERSION =
"1.11.0"
UNICODE_VERSION =
"16.0.0"
EMOJI_VERSION =
"16.0"

Class Method Summary collapse

Class Method Details

.convert_to_encoding_or_raise(string, encoding) ⇒ Object

Raises:

  • (ArgumentError)


44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/uniscribe.rb', line 44

def self.convert_to_encoding_or_raise(string, encoding)
  raise ArgumentError, "no data given to uniscribe" if !string || string.empty?

  string.force_encoding(encoding) if encoding

  case string.encoding.name
  when *SUPPORTED_ENCODINGS
    unless string.valid_encoding?
      raise ArgumentError, "uniscribe can only describe strings with a valid encoding"
    end

    string
  when 'UTF-16', 'UTF-32'
    raise ArgumentError, "unibits only supports #{string.encoding.name} with specified endianess, please use #{string.encoding.name}LE or #{string.encoding.name}BE"
  else
    raise ArgumentError, "uniscribe can only describe Unicode strings (or US-ASCII or ISO-8859-1)"
  end
end

.determine_codepoint_color(char_info) ⇒ Object



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/uniscribe.rb', line 133

def self.determine_codepoint_color(char_info)
  if !char_info.assigned?
    if char_info.ignorable?
      COLORS[:ignorable]
    else
      COLORS[:unassigned]
    end
  elsif char_info.blank?
    COLORS[:blank]
  elsif char_info.control?
    COLORS[:control]
  elsif char_info.format?
    COLORS[:format]
  elsif char_info.unicode? && char_info.category[0] == "M"
    COLORS[:mark]
  else
    random_color
  end
end

.determine_codepoint_name(char) ⇒ Object



157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/uniscribe.rb', line 157

def self.determine_codepoint_name(char)
  name = Unicode::Name.correct(char)
  return name if name

  name = Unicode::Name.label(char)
  as = Unicode::Name.aliases(char)
  return name if !as

  alias_ = ( as[:control]      && as[:control][0]      ||
             as[:figment]      && as[:figment][0]      ||
             as[:alternate]    && as[:alternate][0]    ||
             as[:abbreviation] && as[:abbreviation][0] )
  return name if !alias_

  name + " " + alias_
end

.determine_padding(char, composed, wide_ambiguous) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/uniscribe.rb', line 174

def self.determine_padding(char, composed, wide_ambiguous)
  required_width = Unicode::DisplayWidth.of(char, wide_ambiguous ? 2 : 1, {}, emoji: true)
  required_width += 1 if composed
  required_width = 0 if required_width < 0

  case required_width
  when  0...5
    "\t\t"
  when 5...10
    "\t"
  else
    ""
  end
end

.of(string, encoding: nil, wide_ambiguous: false) ⇒ Object



37
38
39
40
41
42
# File 'lib/uniscribe.rb', line 37

def self.of(string, encoding: nil, wide_ambiguous: false)
  string = convert_to_encoding_or_raise(string, encoding)
  glyphs = string.encode("UTF-8").scan(/\X/)

  visualize(glyphs, wide_ambiguous: wide_ambiguous)
end

.puts_codepoint(cp, composed = false, last = false, wide_ambiguous = false) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/uniscribe.rb', line 107

def self.puts_codepoint(cp, composed = false, last = false, wide_ambiguous = false)
  char = [cp].pack("U*")
  char_info = UnicodeCharacteristics.new(char)
  char_color = determine_codepoint_color(char_info)
  cp_hex = cp.to_s(16).rjust(4, "0").rjust(6).upcase
  symbolified_char = Symbolify.unicode(char, char_info)
  if composed && !last
    branch = "│├─"
  elsif composed && last
    branch = "│└─"
  else
    branch = "├─"
  end
  name = determine_codepoint_name(char)
  padding = determine_padding(symbolified_char, composed, wide_ambiguous)

  puts " %s %s %s%s%s %s" % [
    Paint[cp_hex, char_color],
    branch,
    Paint[symbolified_char, char_color],
    padding,
    branch,
    Paint[name, char_color],
  ]
end

.puts_composition(cps, wide_ambiguous = false) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/uniscribe.rb', line 83

def self.puts_composition(cps, wide_ambiguous = false)
  char = cps.pack("U*")
  if sequence_name = Unicode::SequenceName.of(char)
    name = "Composition: #{sequence_name}"
  else
    name = "Composition"
  end
  char_color = random_color
  cp_hex = "----"
  symbolified_char = symbolify_composition(char)
  padding = determine_padding(symbolified_char, false, wide_ambiguous)

  puts "   %s ├┬ %s%s├┬ %s" % [
    Paint[cp_hex, char_color],
    Paint[symbolified_char, char_color],
    padding,
    Paint[name, char_color],
  ]
  ( cps[0..-2] || [] ).each{ |cp|
    puts_codepoint(cp, true, false, wide_ambiguous)
  }
  puts_codepoint(cps[-1], true, true, wide_ambiguous)
end

.random_colorObject



153
154
155
# File 'lib/uniscribe.rb', line 153

def self.random_color
  "%.2x%.2x%.2x" % [rand(90) + 60, rand(90) + 60, rand(90) + 60]
end

.symbolify_composition(char) ⇒ Object



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/uniscribe.rb', line 189

def self.symbolify_composition(char)
  char_infos = char.chars.map{ |c| UnicodeCharacteristics.new(c) }

  case
  when char_infos.any?{ |c| !c.assigned? }
    "n/a"
  when char_infos.all?{ |c| c.separator? }
    ""
  when char_infos.all?{ |c| c.category == "Mn" || c.category == "Me" }
    if char_infos.any?{ |c| c.category == "Mn" }
      "" + char
    else
      " " + char
    end
  when char_infos.all?{ |c| c.blank? }
    "]" + char + "["
  else
    char
  end
end

.visualize(glyphs, wide_ambiguous: false) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/uniscribe.rb', line 63

def self.visualize(glyphs, wide_ambiguous: false)
  puts
  ( glyphs[0..-2] || [] ).each{ |glyph|
    cps = glyph.codepoints
    if cps.size > 1
      puts_composition(cps, wide_ambiguous)
    else
      puts_codepoint(cps[0], false, false, wide_ambiguous)
    end
  }

  cps = glyphs[-1].codepoints
  if cps.size > 1
    puts_composition(cps, wide_ambiguous)
  else
    puts_codepoint(cps[0], false, true, wide_ambiguous)
  end
  puts
end