Class: BBLib::FuzzyMatcher

Inherits:
Object
  • Object
show all
Defined in:
lib/string/fuzzy_matcher.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(threshold: 75, case_sensitive: true, remove_symbols: false, move_articles: false, convert_roman: true) ⇒ FuzzyMatcher

Returns a new instance of FuzzyMatcher.



8
9
10
11
12
# File 'lib/string/fuzzy_matcher.rb', line 8

def initialize threshold: 75, case_sensitive: true, remove_symbols: false, move_articles: false, convert_roman: true
  self.threshold = threshold
  setup_algorithms
  @case_sensitive, @remove_symbols, @move_articles, @convert_roman = case_sensitive, remove_symbols, move_articles, convert_roman
end

Instance Attribute Details

#aObject

Returns the value of attribute a.



6
7
8
# File 'lib/string/fuzzy_matcher.rb', line 6

def a
  @a
end

#algorithmsObject (readonly)

Returns the value of attribute algorithms.



5
6
7
# File 'lib/string/fuzzy_matcher.rb', line 5

def algorithms
  @algorithms
end

#bObject

Returns the value of attribute b.



6
7
8
# File 'lib/string/fuzzy_matcher.rb', line 6

def b
  @b
end

#case_sensitiveObject

Returns the value of attribute case_sensitive.



6
7
8
# File 'lib/string/fuzzy_matcher.rb', line 6

def case_sensitive
  @case_sensitive
end

#convert_romanObject

Returns the value of attribute convert_roman.



6
7
8
# File 'lib/string/fuzzy_matcher.rb', line 6

def convert_roman
  @convert_roman
end

#move_articlesObject

Returns the value of attribute move_articles.



6
7
8
# File 'lib/string/fuzzy_matcher.rb', line 6

def move_articles
  @move_articles
end

#remove_symbolsObject

Returns the value of attribute remove_symbols.



6
7
8
# File 'lib/string/fuzzy_matcher.rb', line 6

def remove_symbols
  @remove_symbols
end

#thresholdObject

Returns the value of attribute threshold.



5
6
7
# File 'lib/string/fuzzy_matcher.rb', line 5

def threshold
  @threshold
end

Instance Method Details

#best_match(a, b) ⇒ Object

Returns the best match from array b to string a based on percent.



32
33
34
# File 'lib/string/fuzzy_matcher.rb', line 32

def best_match a, b
  similarities(a, b).max_by{ |k, v| v}[0]
end

#match?(a, b) ⇒ Boolean

Checks to see if the match percentage between Strings a and b are equal to or greater than the threshold.

Returns:

  • (Boolean)


27
28
29
# File 'lib/string/fuzzy_matcher.rb', line 27

def match? a, b
  similarity(a, b) >= @threshold.to_f
end

#set_weight(algorithm, weight) ⇒ Object



47
48
49
50
# File 'lib/string/fuzzy_matcher.rb', line 47

def set_weight algorithm, weight
  return nil unless @algorithms.include? algorithm
  @algorithms[algorithm][:weight] = BBLib.keep_between(weight, 0, nil)
end

#similarities(a, b, sort: false) ⇒ Object

Returns a hash of array ‘b’ with the percentage match to a. If sort is true, the hash is sorted desc by match percent.



37
38
39
40
41
# File 'lib/string/fuzzy_matcher.rb', line 37

def similarities a, b, sort: false
  matches = Hash.new
  [b].flatten.each{ |m| matches[m] = self.similarity(a, m) }
  sort ? matches.sort_by{ |k, v| v }.reverse.to_h : matches
end

#similarity(a, b) ⇒ Object

Calculates a percentage match between string a and string b.



15
16
17
18
19
20
21
22
23
24
# File 'lib/string/fuzzy_matcher.rb', line 15

def similarity a, b
  prep_strings a, b
  return 100.0 if @a == @b
  score, total_weight = 0, @algorithms.map{|alg, v| v[:weight] }.inject{ |sum, w| sum+=w }
  @algorithms.each do |algo, vals|
    next unless vals[:weight] > 0
    score+= @a.send(vals[:signature], @b) * vals[:weight]
  end
  score / total_weight
end