Class: PerseusMatch

Inherits:
Object
  • Object
show all
Defined in:
lib/perseus_match/list.rb,
lib/perseus_match.rb,
lib/perseus_match/cluster.rb,
lib/perseus_match/version.rb,
lib/perseus_match/token_set.rb

Overview

#

A component of perseus_match, the fuzzy string matcher #

#

Copyright © 2008 Cologne University of Applied Sciences #

Claudiusstr. 1                                           #
50678 Cologne, Germany                                   #
                                                         #

Authors: #

Jens Wille <jens.wille@uni-koeln.de>                                    #
                                                                        #

perseus_match is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. #

#

perseus_match is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. #

#

You should have received a copy of the GNU General Public License along # with perseus_match. If not, see <www.gnu.org/licenses/>. #

#

++

Defined Under Namespace

Modules: Version Classes: CheckFailedError, Cluster, List, TokenSet

Constant Summary collapse

Infinity =
1.0 / 0
DEFAULT_COEFF =
20
DISTANCE_SPEC =

{

[{},                      1],  #   {}                      => 1,
[{ :excl    => %w[a t] }, 2],  #   { :excl    => %w[a t] } => 1,
[{ :incl    => 's'     }, 3],  #   { :incl    => 's'     } => 2,
[{ :incl    => 'y'     }, 4],  #   { :incl    => 'y'     } => 4,
[{ :sort    => true    }, 4],  #   { :sort    => true    } => 4,
[{ :soundex => true    }, 4]
VERSION =
Version.to_s

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(phrase, target, options = {}) ⇒ PerseusMatch

Returns a new instance of PerseusMatch.



79
80
81
82
83
84
85
86
87
88
89
# File 'lib/perseus_match.rb', line 79

def initialize(phrase, target, options = {})
  @phrase = phrase.to_s
  @target = target.to_s

  @default_coeff = options[:default_coeff] || DEFAULT_COEFF
  @distance_spec = options[:distance_spec] || DISTANCE_SPEC

  @verbose = options[:verbose]

  @similarity = {}
end

Instance Attribute Details

#default_coeffObject (readonly)

Returns the value of attribute default_coeff.



77
78
79
# File 'lib/perseus_match.rb', line 77

def default_coeff
  @default_coeff
end

#distance_specObject (readonly)

Returns the value of attribute distance_spec.



77
78
79
# File 'lib/perseus_match.rb', line 77

def distance_spec
  @distance_spec
end

#phraseObject (readonly)

Returns the value of attribute phrase.



77
78
79
# File 'lib/perseus_match.rb', line 77

def phrase
  @phrase
end

#targetObject (readonly)

Returns the value of attribute target.



77
78
79
# File 'lib/perseus_match.rb', line 77

def target
  @target
end

#verboseObject (readonly)

Returns the value of attribute verbose.



77
78
79
# File 'lib/perseus_match.rb', line 77

def verbose
  @verbose
end

Class Method Details

.check(*args) ⇒ Object



64
65
66
67
68
# File 'lib/perseus_match.rb', line 64

def check(*args)
  check!(*args)
rescue CheckFailedError
  false
end

.check!(phrase, target, threshold = 0, operator = :>, pm_options = {}, attribute = :similarity) ⇒ Object



70
71
72
73
# File 'lib/perseus_match.rb', line 70

def check!(phrase, target, threshold = 0, operator = :>, pm_options = {}, attribute = :similarity)
  value = new(phrase, target, pm_options).send(attribute)
  value.send(operator, threshold) or raise CheckFailedError.new(value, threshold, operator)
end

.cluster(phrases, options = {}, pm_options = {}) ⇒ Object



60
61
62
# File 'lib/perseus_match.rb', line 60

def cluster(phrases, options = {}, pm_options = {})
  Cluster.new(phrases, pm_options).rank(options)
end

.distance(*args) ⇒ Object



52
53
54
# File 'lib/perseus_match.rb', line 52

def distance(*args)
  new(*args).distance
end

.match(phrases, pm_options = {}) ⇒ Object



56
57
58
# File 'lib/perseus_match.rb', line 56

def match(phrases, pm_options = {})
  List.new(phrases, pm_options)
end

Instance Method Details

#distanceObject

0 <= distance <= Infinity



100
101
102
# File 'lib/perseus_match.rb', line 100

def distance
  @distance ||= calculate_distance
end

#phrase_tokensObject



91
92
93
# File 'lib/perseus_match.rb', line 91

def phrase_tokens
  @phrase_tokens ||= tokenize(phrase)
end

#similarity(coeff = nil) ⇒ Object

1 >= similarity >= 0



105
106
107
108
# File 'lib/perseus_match.rb', line 105

def similarity(coeff = nil)
  coeff ||= default_coeff  # passed arg may be nil
  @similarity[coeff] ||= 1 / Math.exp(distance / (coeff * total_weight))
end

#target_tokensObject



95
96
97
# File 'lib/perseus_match.rb', line 95

def target_tokens
  @target_tokens ||= tokenize(target)
end