Module: UnicodeNamecode

Defined in:: lib/unicode_namecode.rb,
lib/unicode_namecode/trie.rb,
lib/unicode_namecode/emoji.rb,
lib/unicode_namecode/fuzzy.rb,
lib/unicode_namecode/aliases.rb,
lib/unicode_namecode/data_loader.rb

Defined Under Namespace

Modules: Aliases, DataLoader, Emoji, Fuzzy Classes: Trie, TrieNode

Constant Summary collapse

VERSION =

"0.1.0"

Class Method Summary collapse

.aliases_for_codepoint(codepoint) ⇒ Object
.calculate_similarity(str1, str2) ⇒ Object

UTILITY METHODS ===.
.codepoint(name) ⇒ Object

Look up a Unicode character’s codepoint by its official name or alias.
.codepoint_for_alias(alias_name) ⇒ Object

ALIAS API METHODS ===.
.codepoint_for_emoji(emoji) ⇒ Object

EMOJI API METHODS ===.
.codepoint_of(character) ⇒ Object

Get the codepoint of a character.
.emoji_for_codepoint(codepoint_or_array) ⇒ Object
.fuzzy_search(name, limit = 5, similarity_threshold = 0.3) ⇒ Object

Find Unicode names similar to the given name (for typos/partial matches).
.is_alias?(name) ⇒ Boolean
.levenshtein_distance(str1, str2) ⇒ Object
.lookup(name) ⇒ Object

Returns Unicode format (U+XXXX).
.name_for_codepoint(codepoint) ⇒ Object

REVERSE LOOKUP METHODS ===.
.name_for_emoji(emoji) ⇒ Object
.of(character) ⇒ Object

Get the Unicode name of a character.
.prefix_search(prefix, limit = 100) ⇒ Object

Find all Unicode names that start with the given prefix.
.unicode_of(character) ⇒ Object

Get the Unicode format of a character.

Class Method Details

.aliases_for_codepoint(codepoint) ⇒ `Object`



87
88
89

# File 'lib/unicode_namecode.rb', line 87

def self.aliases_for_codepoint(codepoint)
  Aliases.aliases_for_codepoint(codepoint)
end

.calculate_similarity(str1, str2) ⇒ `Object`

UTILITY METHODS ===



97
98
99

# File 'lib/unicode_namecode.rb', line 97

def self.calculate_similarity(str1, str2)
  Fuzzy.calculate_similarity(str1, str2)
end

.codepoint(name) ⇒ `Object`

Look up a Unicode character’s codepoint by its official name or alias

# File 'lib/unicode_namecode.rb', line 13

def self.codepoint(name)
  DataLoader.load_data unless DataLoader.trie
  
  # Try exact match first
  result = DataLoader.trie.find(name.strip.upcase)
  return result if result
  
  # Try alias lookup
  Aliases.codepoint_for_alias(name.strip)
end

.codepoint_for_alias(alias_name) ⇒ `Object`

ALIAS API METHODS ===



83
84
85

# File 'lib/unicode_namecode.rb', line 83

def self.codepoint_for_alias(alias_name)
  Aliases.codepoint_for_alias(alias_name)
end

.codepoint_for_emoji(emoji) ⇒ `Object`

EMOJI API METHODS ===



62
63
64

# File 'lib/unicode_namecode.rb', line 62

def self.codepoint_for_emoji(emoji)
  Emoji.codepoint_for_emoji(emoji)
end

.codepoint_of(character) ⇒ `Object`

Get the codepoint of a character



38
39
40

# File 'lib/unicode_namecode.rb', line 38

def self.codepoint_of(character)
  character.ord
end

.emoji_for_codepoint(codepoint_or_array) ⇒ `Object`



70
71
72

# File 'lib/unicode_namecode.rb', line 70

def self.emoji_for_codepoint(codepoint_or_array)
  Emoji.emoji_for_codepoint(codepoint_or_array)
end

.fuzzy_search(name, limit = 5, similarity_threshold = 0.3) ⇒ `Object`

Find Unicode names similar to the given name (for typos/partial matches)

# File 'lib/unicode_namecode.rb', line 55

def self.fuzzy_search(name, limit = 5, similarity_threshold = 0.3)
  DataLoader.load_data unless DataLoader.fuzzy
  Fuzzy.fuzzy_search(DataLoader.fuzzy, name, limit, similarity_threshold)
end

.is_alias?(name) ⇒ `Boolean`

Returns:

(Boolean)



91
92
93

# File 'lib/unicode_namecode.rb', line 91

def self.is_alias?(name)
  Aliases.is_alias?(name)
end

.levenshtein_distance(str1, str2) ⇒ `Object`



101
102
103

# File 'lib/unicode_namecode.rb', line 101

def self.levenshtein_distance(str1, str2)
  Fuzzy.levenshtein_distance(str1, str2)
end

.lookup(name) ⇒ `Object`

Returns Unicode format (U+XXXX)

# File 'lib/unicode_namecode.rb', line 25

def self.lookup(name)
  codepoint = codepoint(name)
  return nil unless codepoint
  "U+#{codepoint.to_s(16).upcase.rjust(4, '0')}"
end

.name_for_codepoint(codepoint) ⇒ `Object`

REVERSE LOOKUP METHODS ===

# File 'lib/unicode_namecode.rb', line 76

def self.name_for_codepoint(codepoint)
  DataLoader.load_data unless DataLoader.codepoint_to_name
  DataLoader.codepoint_to_name[codepoint]
end

.name_for_emoji(emoji) ⇒ `Object`



66
67
68

# File 'lib/unicode_namecode.rb', line 66

def self.name_for_emoji(emoji)
  Emoji.name_for_emoji(emoji)
end

.of(character) ⇒ `Object`

Get the Unicode name of a character

# File 'lib/unicode_namecode.rb', line 32

def self.of(character)
  codepoint = character.ord
  name_for_codepoint(codepoint)
end

.prefix_search(prefix, limit = 100) ⇒ `Object`

Find all Unicode names that start with the given prefix

# File 'lib/unicode_namecode.rb', line 49

def self.prefix_search(prefix, limit = 100)
  DataLoader.load_data unless DataLoader.trie
  DataLoader.trie.prefix_search(prefix.upcase, limit)
end

.unicode_of(character) ⇒ `Object`

Get the Unicode format of a character

# File 'lib/unicode_namecode.rb', line 43

def self.unicode_of(character)
  codepoint = character.ord
  "U+#{codepoint.to_s(16).upcase.rjust(4, '0')}"
end

Module: UnicodeNamecode

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

UTILITY METHODS ===.

ALIAS API METHODS ===.

EMOJI API METHODS ===.

REVERSE LOOKUP METHODS ===.

Class Method Details

.aliases_for_codepoint(codepoint) ⇒ Object

.calculate_similarity(str1, str2) ⇒ Object

UTILITY METHODS ===

.codepoint(name) ⇒ Object

.codepoint_for_alias(alias_name) ⇒ Object

ALIAS API METHODS ===

.codepoint_for_emoji(emoji) ⇒ Object

EMOJI API METHODS ===

.codepoint_of(character) ⇒ Object

.emoji_for_codepoint(codepoint_or_array) ⇒ Object

.fuzzy_search(name, limit = 5, similarity_threshold = 0.3) ⇒ Object

.is_alias?(name) ⇒ Boolean

.levenshtein_distance(str1, str2) ⇒ Object

.lookup(name) ⇒ Object

.name_for_codepoint(codepoint) ⇒ Object