Module: UnicodeNamecode::Fuzzy

Defined in:
lib/unicode_namecode/fuzzy.rb

Overview

Handles fuzzy matching for typo-tolerant Unicode name searches

Class Method Summary collapse

Class Method Details

.calculate_similarity(str1, str2) ⇒ Object

Calculate similarity between two strings using Levenshtein distance



23
24
25
26
27
28
29
30
31
# File 'lib/unicode_namecode/fuzzy.rb', line 23

def self.calculate_similarity(str1, str2)
  return 1.0 if str1 == str2
  return 0.0 if str1.empty? || str2.empty?
  
  distance = levenshtein_distance(str1, str2)
  max_length = [str1.length, str2.length].max
  
  1.0 - (distance.to_f / max_length)
end

.fuzzy_search(fuzzy_matcher, name, limit = 5, similarity_threshold = 0.3) ⇒ Object

Find Unicode names similar to the given name using fuzzy matching



9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/unicode_namecode/fuzzy.rb', line 9

def self.fuzzy_search(fuzzy_matcher, name, limit = 5, similarity_threshold = 0.3)
  return [] unless fuzzy_matcher
  
  # Use fuzzy_match gem to find similar names
  matches = fuzzy_matcher.find_all_with_score(name.upcase, limit: limit * 2)
  
  # Filter by similarity threshold and format results
  matches
    .select { |match, score| score >= similarity_threshold }
    .take(limit)
    .map { |match, score| { name: match, similarity: score } }
end

.levenshtein_distance(str1, str2) ⇒ Object

Calculate the Levenshtein distance between two strings



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/unicode_namecode/fuzzy.rb', line 34

def self.levenshtein_distance(str1, str2)
  # Create a matrix to store distances
  matrix = Array.new(str1.length + 1) { Array.new(str2.length + 1) }
  
  # Initialize first row and column
  (0..str1.length).each { |i| matrix[i][0] = i }
  (0..str2.length).each { |j| matrix[0][j] = j }
  
  # Fill the matrix using dynamic programming
  (1..str1.length).each do |i|
    (1..str2.length).each do |j|
      if str1[i - 1] == str2[j - 1]
        matrix[i][j] = matrix[i - 1][j - 1]
      else
        matrix[i][j] = [
          matrix[i - 1][j] + 1,     # deletion
          matrix[i][j - 1] + 1,     # insertion
          matrix[i - 1][j - 1] + 1  # substitution
        ].min
      end
    end
  end
  
  matrix[str1.length][str2.length]
end