Module: Uniscribe
- Defined in:
- lib/uniscribe.rb,
lib/uniscribe/version.rb
Constant Summary collapse
- UNICODE_VERSION_GLYPH_DETECTION =
RUBY_ENGINE == "ruby" && Unicode::Version.unicode_version
- SUPPORTED_ENCODINGS =
Encoding.name_list.grep( Regexp.union( /^UTF-8$/, /^UTF8-/, /^UTF-...E$/, /^US-ASCII$/, /^ISO-8859-1$/, ) ).sort.freeze
- COLORS =
{ control: "#0000FF", blank: "#33AADD", format: "#FF00FF", mark: "#228822", unassigned: "#FF5500", ignorable: "#FFAA00", }
- VERSION =
"1.11.0"- UNICODE_VERSION =
"16.0.0"- EMOJI_VERSION =
"16.0"
Class Method Summary collapse
- .convert_to_encoding_or_raise(string, encoding) ⇒ Object
- .determine_codepoint_color(char_info) ⇒ Object
- .determine_codepoint_name(char) ⇒ Object
- .determine_padding(char, composed, wide_ambiguous) ⇒ Object
- .of(string, encoding: nil, wide_ambiguous: false) ⇒ Object
- .puts_codepoint(cp, composed = false, last = false, wide_ambiguous = false) ⇒ Object
- .puts_composition(cps, wide_ambiguous = false) ⇒ Object
- .random_color ⇒ Object
- .symbolify_composition(char) ⇒ Object
- .visualize(glyphs, wide_ambiguous: false) ⇒ Object
Class Method Details
.convert_to_encoding_or_raise(string, encoding) ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/uniscribe.rb', line 44 def self.convert_to_encoding_or_raise(string, encoding) raise ArgumentError, "no data given to uniscribe" if !string || string.empty? string.force_encoding(encoding) if encoding case string.encoding.name when *SUPPORTED_ENCODINGS unless string.valid_encoding? raise ArgumentError, "uniscribe can only describe strings with a valid encoding" end string when 'UTF-16', 'UTF-32' raise ArgumentError, "unibits only supports #{string.encoding.name} with specified endianess, please use #{string.encoding.name}LE or #{string.encoding.name}BE" else raise ArgumentError, "uniscribe can only describe Unicode strings (or US-ASCII or ISO-8859-1)" end end |
.determine_codepoint_color(char_info) ⇒ Object
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/uniscribe.rb', line 133 def self.determine_codepoint_color(char_info) if !char_info.assigned? if char_info.ignorable? COLORS[:ignorable] else COLORS[:unassigned] end elsif char_info.blank? COLORS[:blank] elsif char_info.control? COLORS[:control] elsif char_info.format? COLORS[:format] elsif char_info.unicode? && char_info.category[0] == "M" COLORS[:mark] else random_color end end |
.determine_codepoint_name(char) ⇒ Object
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/uniscribe.rb', line 157 def self.determine_codepoint_name(char) name = Unicode::Name.correct(char) return name if name name = Unicode::Name.label(char) as = Unicode::Name.aliases(char) return name if !as alias_ = ( as[:control] && as[:control][0] || as[:figment] && as[:figment][0] || as[:alternate] && as[:alternate][0] || as[:abbreviation] && as[:abbreviation][0] ) return name if !alias_ name + " " + alias_ end |
.determine_padding(char, composed, wide_ambiguous) ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# File 'lib/uniscribe.rb', line 174 def self.determine_padding(char, composed, wide_ambiguous) required_width = Unicode::DisplayWidth.of(char, wide_ambiguous ? 2 : 1, {}, emoji: true) required_width += 1 if composed required_width = 0 if required_width < 0 case required_width when 0...5 "\t\t" when 5...10 "\t" else "" end end |
.of(string, encoding: nil, wide_ambiguous: false) ⇒ Object
37 38 39 40 41 42 |
# File 'lib/uniscribe.rb', line 37 def self.of(string, encoding: nil, wide_ambiguous: false) string = convert_to_encoding_or_raise(string, encoding) glyphs = string.encode("UTF-8").scan(/\X/) visualize(glyphs, wide_ambiguous: wide_ambiguous) end |
.puts_codepoint(cp, composed = false, last = false, wide_ambiguous = false) ⇒ Object
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/uniscribe.rb', line 107 def self.puts_codepoint(cp, composed = false, last = false, wide_ambiguous = false) char = [cp].pack("U*") char_info = UnicodeCharacteristics.new(char) char_color = determine_codepoint_color(char_info) cp_hex = cp.to_s(16).rjust(4, "0").rjust(6).upcase symbolified_char = Symbolify.unicode(char, char_info) if composed && !last branch = "│├─" elsif composed && last branch = "│└─" else branch = "├─" end name = determine_codepoint_name(char) padding = determine_padding(symbolified_char, composed, wide_ambiguous) puts " %s %s %s%s%s %s" % [ Paint[cp_hex, char_color], branch, Paint[symbolified_char, char_color], padding, branch, Paint[name, char_color], ] end |
.puts_composition(cps, wide_ambiguous = false) ⇒ Object
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/uniscribe.rb', line 83 def self.puts_composition(cps, wide_ambiguous = false) char = cps.pack("U*") if sequence_name = Unicode::SequenceName.of(char) name = "Composition: #{sequence_name}" else name = "Composition" end char_color = random_color cp_hex = "----" symbolified_char = symbolify_composition(char) padding = determine_padding(symbolified_char, false, wide_ambiguous) puts " %s ├┬ %s%s├┬ %s" % [ Paint[cp_hex, char_color], Paint[symbolified_char, char_color], padding, Paint[name, char_color], ] ( cps[0..-2] || [] ).each{ |cp| puts_codepoint(cp, true, false, wide_ambiguous) } puts_codepoint(cps[-1], true, true, wide_ambiguous) end |
.random_color ⇒ Object
153 154 155 |
# File 'lib/uniscribe.rb', line 153 def self.random_color "%.2x%.2x%.2x" % [rand(90) + 60, rand(90) + 60, rand(90) + 60] end |
.symbolify_composition(char) ⇒ Object
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/uniscribe.rb', line 189 def self.symbolify_composition(char) char_infos = char.chars.map{ |c| UnicodeCharacteristics.new(c) } case when char_infos.any?{ |c| !c.assigned? } "n/a" when char_infos.all?{ |c| c.separator? } "⏎" when char_infos.all?{ |c| c.category == "Mn" || c.category == "Me" } if char_infos.any?{ |c| c.category == "Mn" } "◌" + char else " " + char end when char_infos.all?{ |c| c.blank? } "]" + char + "[" else char end end |
.visualize(glyphs, wide_ambiguous: false) ⇒ Object
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/uniscribe.rb', line 63 def self.visualize(glyphs, wide_ambiguous: false) puts ( glyphs[0..-2] || [] ).each{ |glyph| cps = glyph.codepoints if cps.size > 1 puts_composition(cps, wide_ambiguous) else puts_codepoint(cps[0], false, false, wide_ambiguous) end } cps = glyphs[-1].codepoints if cps.size > 1 puts_composition(cps, wide_ambiguous) else puts_codepoint(cps[0], false, true, wide_ambiguous) end puts end |