Class: PerseusMatch
- Inherits:
-
Object
- Object
- PerseusMatch
- Defined in:
- lib/perseus_match/list.rb,
lib/perseus_match.rb,
lib/perseus_match/cluster.rb,
lib/perseus_match/version.rb,
lib/perseus_match/token_set.rb
Overview
–
#
A component of perseus_match, the fuzzy string matcher #
#
Copyright © 2008 Cologne University of Applied Sciences #
Claudiusstr. 1 #
50678 Cologne, Germany #
#
Authors: #
Jens Wille <jens.wille@uni-koeln.de> #
#
perseus_match is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. #
#
perseus_match is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. #
#
You should have received a copy of the GNU General Public License along # with perseus_match. If not, see <www.gnu.org/licenses/>. #
#
++
Defined Under Namespace
Modules: Version Classes: CheckFailedError, Cluster, List, TokenSet
Constant Summary collapse
- Infinity =
1.0 / 0
- DEFAULT_COEFF =
20- DISTANCE_SPEC =
{
[{}, 1], # {} => 1, [{ :excl => %w[a t] }, 2], # { :excl => %w[a t] } => 1, [{ :incl => 's' }, 3], # { :incl => 's' } => 2, [{ :incl => 'y' }, 4], # { :incl => 'y' } => 4, [{ :sort => true }, 4], # { :sort => true } => 4, [{ :soundex => true }, 4]
- VERSION =
Version.to_s
Instance Attribute Summary collapse
-
#default_coeff ⇒ Object
readonly
Returns the value of attribute default_coeff.
-
#distance_spec ⇒ Object
readonly
Returns the value of attribute distance_spec.
-
#phrase ⇒ Object
readonly
Returns the value of attribute phrase.
-
#target ⇒ Object
readonly
Returns the value of attribute target.
-
#verbose ⇒ Object
readonly
Returns the value of attribute verbose.
Class Method Summary collapse
- .check(*args) ⇒ Object
- .check!(phrase, target, threshold = 0, operator = :>, pm_options = {}, attribute = :similarity) ⇒ Object
- .cluster(phrases, options = {}, pm_options = {}) ⇒ Object
- .distance(*args) ⇒ Object
- .match(phrases, pm_options = {}) ⇒ Object
Instance Method Summary collapse
-
#distance ⇒ Object
0 <= distance <= Infinity.
-
#initialize(phrase, target, options = {}) ⇒ PerseusMatch
constructor
A new instance of PerseusMatch.
- #phrase_tokens ⇒ Object
-
#similarity(coeff = nil) ⇒ Object
1 >= similarity >= 0.
- #target_tokens ⇒ Object
Constructor Details
#initialize(phrase, target, options = {}) ⇒ PerseusMatch
Returns a new instance of PerseusMatch.
79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/perseus_match.rb', line 79 def initialize(phrase, target, = {}) @phrase = phrase.to_s @target = target.to_s @default_coeff = [:default_coeff] || DEFAULT_COEFF @distance_spec = [:distance_spec] || DISTANCE_SPEC @verbose = [:verbose] @similarity = {} end |
Instance Attribute Details
#default_coeff ⇒ Object (readonly)
Returns the value of attribute default_coeff.
77 78 79 |
# File 'lib/perseus_match.rb', line 77 def default_coeff @default_coeff end |
#distance_spec ⇒ Object (readonly)
Returns the value of attribute distance_spec.
77 78 79 |
# File 'lib/perseus_match.rb', line 77 def distance_spec @distance_spec end |
#phrase ⇒ Object (readonly)
Returns the value of attribute phrase.
77 78 79 |
# File 'lib/perseus_match.rb', line 77 def phrase @phrase end |
#target ⇒ Object (readonly)
Returns the value of attribute target.
77 78 79 |
# File 'lib/perseus_match.rb', line 77 def target @target end |
#verbose ⇒ Object (readonly)
Returns the value of attribute verbose.
77 78 79 |
# File 'lib/perseus_match.rb', line 77 def verbose @verbose end |
Class Method Details
.check(*args) ⇒ Object
64 65 66 67 68 |
# File 'lib/perseus_match.rb', line 64 def check(*args) check!(*args) rescue CheckFailedError false end |
.check!(phrase, target, threshold = 0, operator = :>, pm_options = {}, attribute = :similarity) ⇒ Object
70 71 72 73 |
# File 'lib/perseus_match.rb', line 70 def check!(phrase, target, threshold = 0, operator = :>, = {}, attribute = :similarity) value = new(phrase, target, ).send(attribute) value.send(operator, threshold) or raise CheckFailedError.new(value, threshold, operator) end |
.cluster(phrases, options = {}, pm_options = {}) ⇒ Object
60 61 62 |
# File 'lib/perseus_match.rb', line 60 def cluster(phrases, = {}, = {}) Cluster.new(phrases, ).rank() end |
.distance(*args) ⇒ Object
52 53 54 |
# File 'lib/perseus_match.rb', line 52 def distance(*args) new(*args).distance end |
.match(phrases, pm_options = {}) ⇒ Object
56 57 58 |
# File 'lib/perseus_match.rb', line 56 def match(phrases, = {}) List.new(phrases, ) end |
Instance Method Details
#distance ⇒ Object
0 <= distance <= Infinity
100 101 102 |
# File 'lib/perseus_match.rb', line 100 def distance @distance ||= calculate_distance end |
#phrase_tokens ⇒ Object
91 92 93 |
# File 'lib/perseus_match.rb', line 91 def phrase_tokens @phrase_tokens ||= tokenize(phrase) end |
#similarity(coeff = nil) ⇒ Object
1 >= similarity >= 0
105 106 107 108 |
# File 'lib/perseus_match.rb', line 105 def similarity(coeff = nil) coeff ||= default_coeff # passed arg may be nil @similarity[coeff] ||= 1 / Math.exp(distance / (coeff * total_weight)) end |
#target_tokens ⇒ Object
95 96 97 |
# File 'lib/perseus_match.rb', line 95 def target_tokens @target_tokens ||= tokenize(target) end |