Class: Licensee::Matchers::Dice

Inherits:
Object
  • Object
show all
Defined in:
lib/licensee/matchers/dice_matcher.rb

Instance Method Summary collapse

Constructor Details

#initialize(file) ⇒ Dice

Returns a new instance of Dice.



4
5
6
# File 'lib/licensee/matchers/dice_matcher.rb', line 4

def initialize(file)
  @file = file
end

Instance Method Details

#confidenceObject

Confidence that the matched license is a match



52
53
54
# File 'lib/licensee/matchers/dice_matcher.rb', line 52

def confidence
  @confidence ||= match ? similarity(match) : 0
end

#length_delta(license) ⇒ Object

Calculate the difference between the file length and a given license’s length



41
42
43
# File 'lib/licensee/matchers/dice_matcher.rb', line 41

def length_delta(license)
  (@file.wordset.size - license.wordset.size).abs
end

#matchObject

Return the first potential license that is more similar than the confidence threshold



10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/licensee/matchers/dice_matcher.rb', line 10

def match
  return @match if defined? @match
  matches = potential_licenses.map do |license|
    if (sim = similarity(license)) >= Licensee.confidence_threshold
      [license, sim]
    end
  end
  matches.compact!
  @match = if matches.empty?
    nil
  else
    matches.max_by { |l, sim| sim }.first
  end
end

#max_deltaObject

Maximum possible difference between file length and license length for a license to be a potential license to be matched



47
48
49
# File 'lib/licensee/matchers/dice_matcher.rb', line 47

def max_delta
  @max_delta ||= (@file.wordset.size * (Licensee.confidence_threshold/100.0))
end

#potential_licensesObject

Sort all licenses, in decending order, by difference in length to the file Difference in lengths cannot exceed the file’s length * the confidence threshold / 100



29
30
31
32
33
34
35
36
37
# File 'lib/licensee/matchers/dice_matcher.rb', line 29

def potential_licenses
  @potential_licenses ||= begin
    licenses = Licensee.licenses(:hidden => true)
    licenses = licenses.select do |license|
      license.wordset && length_delta(license) <= max_delta
    end
    licenses.sort_by { |l| length_delta(l) }
  end
end