Class: Licensee::Matchers::Dice

Inherits:
Matcher
  • Object
show all
Defined in:
lib/licensee/matchers/dice.rb

Constant Summary

Constants inherited from Matcher

Matcher::HASH_METHODS

Instance Attribute Summary

Attributes inherited from Matcher

#file

Instance Method Summary collapse

Methods inherited from Matcher

#initialize, #name

Methods included from HashHelper

#to_h

Constructor Details

This class inherits a constructor from Licensee::Matchers::Matcher

Instance Method Details

#confidenceObject

Confidence that the matched license is a match



49
50
51
# File 'lib/licensee/matchers/dice.rb', line 49

def confidence
  @confidence ||= match ? file.similarity(match) : 0
end

#licenses_by_similiarityObject



33
34
35
36
37
38
39
40
# File 'lib/licensee/matchers/dice.rb', line 33

def licenses_by_similiarity
  @licenses_by_similiarity ||= begin
    licenses = potential_licenses.map do |license|
      [license, license.similarity(file)]
    end
    licenses.sort_by { |_, similarity| similarity }.reverse
  end
end

#matchObject

Return the first potential license that is more similar than the confidence threshold



6
7
8
9
10
11
12
# File 'lib/licensee/matchers/dice.rb', line 6

def match
  @match ||= if matches.empty?
    nil
  else
    matches.first[0]
  end
end

#matchesObject



42
43
44
45
46
# File 'lib/licensee/matchers/dice.rb', line 42

def matches
  @matches ||= licenses_by_similiarity.select do |_, similarity|
    similarity >= Licensee.confidence_threshold
  end
end

#potential_licensesObject

Licenses that may be a match for this file. To avoid false positives:

  1. Creative commons licenses cannot be matched against license files that begin with the title of a non-open source CC license variant

  2. The percentage change in file length may not exceed the inverse of the confidence threshold



21
22
23
24
25
26
27
28
29
30
31
# File 'lib/licensee/matchers/dice.rb', line 21

def potential_licenses
  @potential_licenses ||= begin
    Licensee.licenses(hidden: true).select do |license|
      if license.creative_commons? && file.potential_false_positive?
        false
      else
        license.wordset && license.length_delta(file) <= license.max_delta
      end
    end
  end
end