Module: UnicodeNamecode

Defined in:
lib/unicode_namecode.rb,
lib/unicode_namecode/trie.rb,
lib/unicode_namecode/emoji.rb,
lib/unicode_namecode/fuzzy.rb,
lib/unicode_namecode/aliases.rb,
lib/unicode_namecode/data_loader.rb

Defined Under Namespace

Modules: Aliases, DataLoader, Emoji, Fuzzy Classes: Trie, TrieNode

Constant Summary collapse

VERSION =
"0.1.0"

Class Method Summary collapse

Class Method Details

.aliases_for_codepoint(codepoint) ⇒ Object



87
88
89
# File 'lib/unicode_namecode.rb', line 87

def self.aliases_for_codepoint(codepoint)
  Aliases.aliases_for_codepoint(codepoint)
end

.calculate_similarity(str1, str2) ⇒ Object

UTILITY METHODS ===



97
98
99
# File 'lib/unicode_namecode.rb', line 97

def self.calculate_similarity(str1, str2)
  Fuzzy.calculate_similarity(str1, str2)
end

.codepoint(name) ⇒ Object

Look up a Unicode character’s codepoint by its official name or alias



13
14
15
16
17
18
19
20
21
22
# File 'lib/unicode_namecode.rb', line 13

def self.codepoint(name)
  DataLoader.load_data unless DataLoader.trie
  
  # Try exact match first
  result = DataLoader.trie.find(name.strip.upcase)
  return result if result
  
  # Try alias lookup
  Aliases.codepoint_for_alias(name.strip)
end

.codepoint_for_alias(alias_name) ⇒ Object

ALIAS API METHODS ===



83
84
85
# File 'lib/unicode_namecode.rb', line 83

def self.codepoint_for_alias(alias_name)
  Aliases.codepoint_for_alias(alias_name)
end

.codepoint_for_emoji(emoji) ⇒ Object

EMOJI API METHODS ===



62
63
64
# File 'lib/unicode_namecode.rb', line 62

def self.codepoint_for_emoji(emoji)
  Emoji.codepoint_for_emoji(emoji)
end

.codepoint_of(character) ⇒ Object

Get the codepoint of a character



38
39
40
# File 'lib/unicode_namecode.rb', line 38

def self.codepoint_of(character)
  character.ord
end

.emoji_for_codepoint(codepoint_or_array) ⇒ Object



70
71
72
# File 'lib/unicode_namecode.rb', line 70

def self.emoji_for_codepoint(codepoint_or_array)
  Emoji.emoji_for_codepoint(codepoint_or_array)
end

.fuzzy_search(name, limit = 5, similarity_threshold = 0.3) ⇒ Object

Find Unicode names similar to the given name (for typos/partial matches)



55
56
57
58
# File 'lib/unicode_namecode.rb', line 55

def self.fuzzy_search(name, limit = 5, similarity_threshold = 0.3)
  DataLoader.load_data unless DataLoader.fuzzy
  Fuzzy.fuzzy_search(DataLoader.fuzzy, name, limit, similarity_threshold)
end

.is_alias?(name) ⇒ Boolean

Returns:

  • (Boolean)


91
92
93
# File 'lib/unicode_namecode.rb', line 91

def self.is_alias?(name)
  Aliases.is_alias?(name)
end

.levenshtein_distance(str1, str2) ⇒ Object



101
102
103
# File 'lib/unicode_namecode.rb', line 101

def self.levenshtein_distance(str1, str2)
  Fuzzy.levenshtein_distance(str1, str2)
end

.lookup(name) ⇒ Object

Returns Unicode format (U+XXXX)



25
26
27
28
29
# File 'lib/unicode_namecode.rb', line 25

def self.lookup(name)
  codepoint = codepoint(name)
  return nil unless codepoint
  "U+#{codepoint.to_s(16).upcase.rjust(4, '0')}"
end

.name_for_codepoint(codepoint) ⇒ Object

REVERSE LOOKUP METHODS ===



76
77
78
79
# File 'lib/unicode_namecode.rb', line 76

def self.name_for_codepoint(codepoint)
  DataLoader.load_data unless DataLoader.codepoint_to_name
  DataLoader.codepoint_to_name[codepoint]
end

.name_for_emoji(emoji) ⇒ Object



66
67
68
# File 'lib/unicode_namecode.rb', line 66

def self.name_for_emoji(emoji)
  Emoji.name_for_emoji(emoji)
end

.of(character) ⇒ Object

Get the Unicode name of a character



32
33
34
35
# File 'lib/unicode_namecode.rb', line 32

def self.of(character)
  codepoint = character.ord
  name_for_codepoint(codepoint)
end

.prefix_search(prefix, limit = 100) ⇒ Object

Find all Unicode names that start with the given prefix



49
50
51
52
# File 'lib/unicode_namecode.rb', line 49

def self.prefix_search(prefix, limit = 100)
  DataLoader.load_data unless DataLoader.trie
  DataLoader.trie.prefix_search(prefix.upcase, limit)
end

.unicode_of(character) ⇒ Object

Get the Unicode format of a character



43
44
45
46
# File 'lib/unicode_namecode.rb', line 43

def self.unicode_of(character)
  codepoint = character.ord
  "U+#{codepoint.to_s(16).upcase.rjust(4, '0')}"
end