Class: BBLib::FuzzyMatcher

Inherits:
Object
  • Object
show all
Defined in:
lib/string/fuzzy_matcher.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(threshold: 75, case_sensitive: true, remove_symbols: false, move_articles: false, convert_roman: true) ⇒ FuzzyMatcher

Returns a new instance of FuzzyMatcher.



8
9
10
11
# File 'lib/string/fuzzy_matcher.rb', line 8

def initialize threshold: 75, case_sensitive: true, remove_symbols: false, move_articles: false, convert_roman: true
  self.threshold = threshold
  @case_sensitive, @remove_symbols, @move_articles, @convert_roman = case_sensitive, remove_symbols, move_articles, convert_roman
end

Instance Attribute Details

#case_sensitiveObject

Returns the value of attribute case_sensitive.



6
7
8
# File 'lib/string/fuzzy_matcher.rb', line 6

def case_sensitive
  @case_sensitive
end

#convert_romanObject

Returns the value of attribute convert_roman.



6
7
8
# File 'lib/string/fuzzy_matcher.rb', line 6

def convert_roman
  @convert_roman
end

#move_articlesObject

Returns the value of attribute move_articles.



6
7
8
# File 'lib/string/fuzzy_matcher.rb', line 6

def move_articles
  @move_articles
end

#remove_symbolsObject

Returns the value of attribute remove_symbols.



6
7
8
# File 'lib/string/fuzzy_matcher.rb', line 6

def remove_symbols
  @remove_symbols
end

#thresholdObject

Returns the value of attribute threshold.



5
6
7
# File 'lib/string/fuzzy_matcher.rb', line 5

def threshold
  @threshold
end

Instance Method Details

#algorithmsObject



51
52
53
# File 'lib/string/fuzzy_matcher.rb', line 51

def algorithms
  ALGORITHMS.keys
end

#best_match(a, b) ⇒ Object

Returns the best match from array b to string a based on percent.



31
32
33
# File 'lib/string/fuzzy_matcher.rb', line 31

def best_match a, b
  similarities(a, b).max_by{ |k, v| v}[0]
end

#match?(a, b) ⇒ Boolean

Checks to see if the match percentage between Strings a and b are equal to or greater than the threshold.

Returns:

  • (Boolean)


26
27
28
# File 'lib/string/fuzzy_matcher.rb', line 26

def match? a, b
  similarity(a, b) >= @threshold.to_f
end

#set_weight(algorithm, weight) ⇒ Object



46
47
48
49
# File 'lib/string/fuzzy_matcher.rb', line 46

def set_weight algorithm, weight
  return nil unless ALGORITHMS.include? algorithm
  ALGORITHMS[algorithm] = BBLib.keep_between(weight, 0, nil)
end

#similarities(a, b, sort: false) ⇒ Object

Returns a hash of array ‘b’ with the percentage match to a. If sort is true, the hash is sorted desc by match percent.



36
37
38
39
40
# File 'lib/string/fuzzy_matcher.rb', line 36

def similarities a, b, sort: false
  matches = Hash.new
  [b].flatten.each{ |m| matches[m] = self.similarity(a, m) }
  sort ? matches.sort_by{ |k, v| v }.reverse.to_h : matches
end

#similarity(a, b) ⇒ Object

Calculates a percentage match between string a and string b.



14
15
16
17
18
19
20
21
22
23
# File 'lib/string/fuzzy_matcher.rb', line 14

def similarity a, b
  return 100.0 if a == b
  prep_strings a, b
  score, total_weight = 0, ALGORITHMS.map{|a, v| v[:weight] }.inject{ |sum, w| sum+=w }
  ALGORITHMS.each do |algo, vals|
    next unless vals[:weight] > 0
    score+= @a.send(vals[:signature], @b) * vals[:weight]
  end
  score / total_weight
end