Class: BBLib::FuzzyMatcher
- Inherits:
-
Object
- Object
- BBLib::FuzzyMatcher
- Defined in:
- lib/string/fuzzy_matcher.rb
Instance Attribute Summary collapse
-
#a ⇒ Object
Returns the value of attribute a.
-
#algorithms ⇒ Object
readonly
Returns the value of attribute algorithms.
-
#b ⇒ Object
Returns the value of attribute b.
-
#case_sensitive ⇒ Object
Returns the value of attribute case_sensitive.
-
#convert_roman ⇒ Object
Returns the value of attribute convert_roman.
-
#move_articles ⇒ Object
Returns the value of attribute move_articles.
-
#remove_symbols ⇒ Object
Returns the value of attribute remove_symbols.
-
#threshold ⇒ Object
Returns the value of attribute threshold.
Instance Method Summary collapse
-
#best_match(a, b) ⇒ Object
Returns the best match from array b to string a based on percent.
-
#initialize(threshold: 75, case_sensitive: true, remove_symbols: false, move_articles: false, convert_roman: true) ⇒ FuzzyMatcher
constructor
A new instance of FuzzyMatcher.
-
#match?(a, b) ⇒ Boolean
Checks to see if the match percentage between Strings a and b are equal to or greater than the threshold.
- #set_weight(algorithm, weight) ⇒ Object
-
#similarities(a, b, sort: false) ⇒ Object
Returns a hash of array ‘b’ with the percentage match to a.
-
#similarity(a, b) ⇒ Object
Calculates a percentage match between string a and string b.
Constructor Details
#initialize(threshold: 75, case_sensitive: true, remove_symbols: false, move_articles: false, convert_roman: true) ⇒ FuzzyMatcher
Returns a new instance of FuzzyMatcher.
8 9 10 11 12 |
# File 'lib/string/fuzzy_matcher.rb', line 8 def initialize threshold: 75, case_sensitive: true, remove_symbols: false, move_articles: false, convert_roman: true self.threshold = threshold setup_algorithms @case_sensitive, @remove_symbols, @move_articles, @convert_roman = case_sensitive, remove_symbols, move_articles, convert_roman end |
Instance Attribute Details
#a ⇒ Object
Returns the value of attribute a.
6 7 8 |
# File 'lib/string/fuzzy_matcher.rb', line 6 def a @a end |
#algorithms ⇒ Object (readonly)
Returns the value of attribute algorithms.
5 6 7 |
# File 'lib/string/fuzzy_matcher.rb', line 5 def algorithms @algorithms end |
#b ⇒ Object
Returns the value of attribute b.
6 7 8 |
# File 'lib/string/fuzzy_matcher.rb', line 6 def b @b end |
#case_sensitive ⇒ Object
Returns the value of attribute case_sensitive.
6 7 8 |
# File 'lib/string/fuzzy_matcher.rb', line 6 def case_sensitive @case_sensitive end |
#convert_roman ⇒ Object
Returns the value of attribute convert_roman.
6 7 8 |
# File 'lib/string/fuzzy_matcher.rb', line 6 def convert_roman @convert_roman end |
#move_articles ⇒ Object
Returns the value of attribute move_articles.
6 7 8 |
# File 'lib/string/fuzzy_matcher.rb', line 6 def move_articles @move_articles end |
#remove_symbols ⇒ Object
Returns the value of attribute remove_symbols.
6 7 8 |
# File 'lib/string/fuzzy_matcher.rb', line 6 def remove_symbols @remove_symbols end |
#threshold ⇒ Object
Returns the value of attribute threshold.
5 6 7 |
# File 'lib/string/fuzzy_matcher.rb', line 5 def threshold @threshold end |
Instance Method Details
#best_match(a, b) ⇒ Object
Returns the best match from array b to string a based on percent.
32 33 34 |
# File 'lib/string/fuzzy_matcher.rb', line 32 def best_match a, b similarities(a, b).max_by{ |k, v| v}[0] end |
#match?(a, b) ⇒ Boolean
Checks to see if the match percentage between Strings a and b are equal to or greater than the threshold.
27 28 29 |
# File 'lib/string/fuzzy_matcher.rb', line 27 def match? a, b similarity(a, b) >= @threshold.to_f end |
#set_weight(algorithm, weight) ⇒ Object
47 48 49 50 |
# File 'lib/string/fuzzy_matcher.rb', line 47 def set_weight algorithm, weight return nil unless @algorithms.include? algorithm @algorithms[algorithm][:weight] = BBLib.keep_between(weight, 0, nil) end |
#similarities(a, b, sort: false) ⇒ Object
Returns a hash of array ‘b’ with the percentage match to a. If sort is true, the hash is sorted desc by match percent.
37 38 39 40 41 |
# File 'lib/string/fuzzy_matcher.rb', line 37 def similarities a, b, sort: false matches = Hash.new [b].flatten.each{ |m| matches[m] = self.similarity(a, m) } sort ? matches.sort_by{ |k, v| v }.reverse.to_h : matches end |
#similarity(a, b) ⇒ Object
Calculates a percentage match between string a and string b.
15 16 17 18 19 20 21 22 23 24 |
# File 'lib/string/fuzzy_matcher.rb', line 15 def similarity a, b prep_strings a, b return 100.0 if @a == @b score, total_weight = 0, @algorithms.map{|alg, v| v[:weight] }.inject{ |sum, w| sum+=w } @algorithms.each do |algo, vals| next unless vals[:weight] > 0 score+= @a.send(vals[:signature], @b) * vals[:weight] end score / total_weight end |