Module: Evoc::InterestingnessMeasures

Included in:: Rule

Defined in:: lib/evoc/interestingness_measures.rb

Defined Under Namespace

Constant Summary collapse

VALUE_TYPE =

Rational

M_HYPER_COEFFICIENT_MIN = Hyper Coefficient The number of rules used to form a hyper rule

M_HYPER_COEFFICIENT_MID =

M_HYPER_COEFFICIENT_MAX =

Float::INFINITY

M_HYPER_COEFFICIENT_HYPER_MEASURE =

true

M_SUPPORT_MIN = INTERESTINGNESS MEASURES if not stated otherwise, all of the implementations are based on Michael Hahslers overview at: michael.hahsler.net/research/association_rules/measures.html

M_SUPPORT_MID =

M_SUPPORT_MAX =

M_CONFIDENCE_MIN =

M_CONFIDENCE_MID =

M_CONFIDENCE_MAX =

M_COVERAGE_MIN =

M_COVERAGE_MID =

M_COVERAGE_MAX =

M_PREVALENCE_MIN =

M_PREVALENCE_MID =

M_PREVALENCE_MAX =

M_RECALL_MIN =

M_RECALL_MID =

M_RECALL_MAX =

M_SPECIFICITY_MIN =

M_SPECIFICITY_MID =

M_SPECIFICITY_MAX =

M_LIFT_MIN =

M_LIFT_MID =

M_LIFT_MAX =

Float::INFINITY

M_LEVERAGE_MIN =

-1

M_LEVERAGE_MID =

M_LEVERAGE_MAX =

M_PIATETSKY_SHAPIRO_MIN =

-0.25

M_PIATETSKY_SHAPIRO_MID =

M_PIATETSKY_SHAPIRO_MAX =

0.25

M_ADDED_VALUE_MIN = aka: pavillion index, centered confidence

-0.5

M_ADDED_VALUE_MID =

M_ADDED_VALUE_MAX =

M_CAUSAL_CONFIDENCE_MIN =

M_CAUSAL_CONFIDENCE_MID =

M_CAUSAL_CONFIDENCE_MAX =

M_CAUSAL_SUPPORT_MIN =

M_CAUSAL_SUPPORT_MID =

M_CAUSAL_SUPPORT_MAX =

M_DESCRIPTIVE_CONFIRMED_CONFIDENCE_MIN =

-1

M_DESCRIPTIVE_CONFIRMED_CONFIDENCE_MID =

M_DESCRIPTIVE_CONFIRMED_CONFIDENCE_MAX =

M_DIFFERENCE_OF_CONFIDENCE_MIN =

-1

M_DIFFERENCE_OF_CONFIDENCE_MID =

M_DIFFERENCE_OF_CONFIDENCE_MAX =

M_RELATIVE_RISK_MIN =

M_RELATIVE_RISK_MID =

M_RELATIVE_RISK_MAX =

Float::INFINITY

M_JACCARD_MIN =

M_JACCARD_MID =

M_JACCARD_MAX =

M_IMBALANCE_RATIO_MIN =

M_IMBALANCE_RATIO_MID =

M_IMBALANCE_RATIO_MAX =

M_ODDS_RATIO_MIN =

M_ODDS_RATIO_MID =

M_ODDS_RATIO_MAX =

Float::INFINITY

M_YULES_Q_MIN =

-1

M_YULES_Q_MID =

M_YULES_Q_MAX =

M_YULES_Y_MIN =

-1

M_YULES_Y_MID =

M_YULES_Y_MAX =

M_KLOSGEN_MIN = from Tan2004

-1

M_KLOSGEN_MID =

M_KLOSGEN_MAX =

M_KULCZYNSKI_MIN =

M_KULCZYNSKI_MID =

M_KULCZYNSKI_MAX =

M_CONVICTION_MIN =

M_CONVICTION_MID =

M_CONVICTION_MAX =

Float::INFINITY

M_INTERESTINGNESS_WEIGHTING_DEPENDENCY_MIN = uses 2 coefficients two weight the importance of the two factors k : dependency m : generality

M_INTERESTINGNESS_WEIGHTING_DEPENDENCY_MID =

M_INTERESTINGNESS_WEIGHTING_DEPENDENCY_MAX =

M_COLLECTIVE_STRENGTH_MIN =

-Float::INFINITY

M_COLLECTIVE_STRENGTH_MID =

M_COLLECTIVE_STRENGTH_MAX =

Float::INFINITY

M_GINI_INDEX_MIN =

M_GINI_INDEX_MID =

M_GINI_INDEX_MAX =

M_KAPPA_MIN =

-1

M_KAPPA_MID =

M_KAPPA_MAX =

M_J_MEASURE_MIN =

M_J_MEASURE_MID =

M_J_MEASURE_MAX =

M_ONE_WAY_SUPPORT_MIN =

-1

M_ONE_WAY_SUPPORT_MID =

M_ONE_WAY_SUPPORT_MAX =

Float::INFINITY

M_TWO_WAY_SUPPORT_MIN =

-1

M_TWO_WAY_SUPPORT_MID =

M_TWO_WAY_SUPPORT_MAX =

M_LINEAR_CORRELATION_COEFFICIENT_MIN = aka Ø-coefficient

-1

M_LINEAR_CORRELATION_COEFFICIENT_MID =

M_LINEAR_CORRELATION_COEFFICIENT_MAX =

M_COSINE_MIN =

M_COSINE_MID =

M_COSINE_MAX =

M_LOEVINGER_MIN =

-1

M_LOEVINGER_MID =

M_LOEVINGER_MAX =

M_SEBAG_SCHOENAUER_MIN =

M_SEBAG_SCHOENAUER_MID =

M_SEBAG_SCHOENAUER_MAX =

Float::INFINITY

M_VARYING_RATES_LIAISON_MIN =

-1

M_VARYING_RATES_LIAISON_MID =

M_VARYING_RATES_LIAISON_MAX =

Float::INFINITY

M_LEAST_CONTRADICTION_MIN =

-Float::INFINITY

M_LEAST_CONTRADICTION_MID =

M_LEAST_CONTRADICTION_MAX =

M_ODD_MULTIPLIER_MIN =

M_ODD_MULTIPLIER_MID =

M_ODD_MULTIPLIER_MAX =

Float::INFINITY

M_EXAMPLE_AND_COUNTEREXAMPLE_RATE_MIN =

-Float::INFINITY

M_EXAMPLE_AND_COUNTEREXAMPLE_RATE_MID =

M_EXAMPLE_AND_COUNTEREXAMPLE_RATE_MAX =

M_ZHANG_MIN =

-1

M_ZHANG_MID =

M_ZHANG_MAX =

M_LAPLACE_CORRECTED_CONFIDENCE_MIN =

M_LAPLACE_CORRECTED_CONFIDENCE_MID =

M_LAPLACE_CORRECTED_CONFIDENCE_MAX =

Class Method Summary collapse

Instance Method Summary collapse

#get_measure(measure) ⇒ Object

a common getter for all measures .
#get_p(p) ⇒ Object
#get_p_values ⇒ Hash

A hash containing the p probabilities of this rule.
#instantiated_measures ⇒ Object

returns the measures which has been instantiated.
#lhs ⇒ Object
#m_added_value ⇒ Object
#m_causal_confidence ⇒ Object
#m_causal_support ⇒ Object
#m_collective_strength ⇒ Object

range from Aggarwal1998.
#m_confidence ⇒ Object
#m_conviction ⇒ Object
#m_cosine ⇒ Object
#m_coverage ⇒ Object
#m_descriptive_confirmed_confidence ⇒ Object
#m_difference_of_confidence ⇒ Object
#m_example_and_counterexample_rate ⇒ Object

0 when equally many examples as counter examples.
#m_gini_index ⇒ Object

Measures quadratic entropy.
#m_hyper_coefficient ⇒ Object
#m_imbalance_ratio ⇒ Object

IR gauges the degree of imbalance between two events that the lhs and the rhs are contained in a transaction.
#m_interestingness_weighting_dependency ⇒ Object
#m_j_measure ⇒ Object

Measures cross entropy.
#m_jaccard ⇒ Object
#m_kappa ⇒ Object
#m_klosgen ⇒ Object
#m_kulczynski ⇒ Object

Calculate the null-invariant Kulczynski measure with a preference for skewed patterns.
#m_laplace_corrected_confidence ⇒ Object

Corrected confidence estimate decreases with lower support to account for estimation uncertainty with low counts.
#m_least_contradiction ⇒ Object
#m_leverage ⇒ Object

Leverage measures the difference of X and Y appearing together in the data set and what would be expected if X and Y where statistically dependent.
#m_lift ⇒ Object

aka interest Lift measures how many times more often X and Y occur together than expected if they where statistically independent.
#m_linear_correlation_coefficient ⇒ Object
#m_loevinger ⇒ Object

aka Certainty Factor The certainty factor is a measure of variation of the probability that Y is in a transaction when only considering transactions with X.
#m_odd_multiplier ⇒ Object
#m_odds_ratio ⇒ Object

The odds of finding X in transactions which contain Y divided by the odds of finding X in transactions which do not contain Y.
#m_one_way_support ⇒ Object
#m_piatetsky_shapiro ⇒ Object
#m_prevalence ⇒ Object
#m_recall ⇒ Object
#m_relative_risk ⇒ Object
#m_sebag_schoenauer ⇒ Object
#m_specificity ⇒ Object
#m_support ⇒ Object
#m_two_way_support ⇒ Object
#m_varying_rates_liaison ⇒ Object
#m_yules_q ⇒ Object
#m_yules_y ⇒ Object
#m_zhang ⇒ Object
#measure_instantiated?(measure) ⇒ Boolean

Returns true if the measure has been instantiated.
#n ⇒ Object

the number of transactions n is converted into the specified type to ensure that the type is used throughout calculations.
#name ⇒ Object
#p_A ⇒ Object

the ratio of tx with A as a subset.
#p_A_notB ⇒ Object

the ratio of tx where A is a subset but B is not.
#p_AB ⇒ Object

the ratio of tx with the union of A and B as a subset.
#p_AgivenB ⇒ Object

the ratio of the union being a subset to the number of txes where B is a subset.
#p_AgivennotB ⇒ Object

if notB is 0, so is A_notB.
#p_AorB ⇒ Object

the ratio of tx where A or B is a subset.
#p_B ⇒ Object

the ratio of tx with B as a subset.
#p_BgivenA ⇒ Object

the ratio of the union being a subset to the number of txes where A is a subset.
#p_BgivennotA ⇒ Object

if notA is 0, so is notA_B.
#p_notA ⇒ Object

the ratio of tx where A is not a subset.
#p_notA_B ⇒ Object

the ratio of tx where A is not a subset but B is.
#p_notA_notB ⇒ Object

the ratio of tx where neither A or B is a subset.
#p_notAgivenB ⇒ Object
#p_notAgivennotB ⇒ Object

if notB is 0, so is notA and notB.
#p_notB ⇒ Object

the ratio of tx where B is not a subset.
#p_notBgivenA ⇒ Object

if A is 0, so is A,notB.
#p_notBgivennotA ⇒ Object
#rhs ⇒ Object
#set_measure(measure, value, hyper_measure: false) ⇒ Object
#set_p(p, value) ⇒ Object

manually set the probability p of this rule.
#to_a ⇒ Object
#tx_store ⇒ Object

methods that must be implemented in the class that uses this module as a mixin.

Class Method Details

.get_max(measure) ⇒ `Object`



41
42
43

# File 'lib/evoc/interestingness_measures.rb', line 41

def self.get_max(measure)
  const_get(measure.to_s.upcase+"_MAX")
end

.get_mid(measure) ⇒ `Object`



45
46
47

# File 'lib/evoc/interestingness_measures.rb', line 45

def self.get_mid(measure)
  const_get(measure.to_s.upcase+"_MID")
end

.get_min(measure) ⇒ `Object`



37
38
39

# File 'lib/evoc/interestingness_measures.rb', line 37

def self.get_min(measure)
  const_get(measure.to_s.upcase+"_MIN")
end

.hyper_measures ⇒ `Object`



53
54
55

# File 'lib/evoc/interestingness_measures.rb', line 53

def self.hyper_measures
  self.instance_methods.grep(/\Am_(.*)/).select {|m| is_hyper_measure?(m)}
end

.included(base) ⇒ `Object`

class methods

the following is a idiom/hack that enables also including class methods when a class includes this module (normally one would use ‘extend’)



33
34
35

# File 'lib/evoc/interestingness_measures.rb', line 33

def self.included(base)
  base.extend(ClassMethods)
end

.is_hyper_measure?(m) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/evoc/interestingness_measures.rb', line 57

def self.is_hyper_measure?(m)
  begin
    const_get(m.to_s.upcase+"_HYPER_MEASURE")
  rescue NameError
    return false
  end
end

.measures ⇒ `Object`



49
50
51

# File 'lib/evoc/interestingness_measures.rb', line 49

def self.measures
  self.instance_methods.grep(/\Am_(.*)/)
end

Instance Method Details

#get_measure(measure) ⇒ `Object`

a common getter for all measures

handles exceptions and converts the final measure to float

# File 'lib/evoc/interestingness_measures.rb', line 119

def get_measure(measure)
  if m = self.method(measure).call
    return m
  else
    raise NotImplementedError.new, "#{measure} not implemented"
  end
rescue Evoc::Exceptions::MeasureCalculationError => e
  logger.warn "#{measure} was undefined for #{self.name} on the current history, error: #{e}"
  m = self.class.initialize_measure(measure)
  self.instance_variable_set('@'+measure.to_s,m)
  self.method(measure).call
end

#get_p(p) ⇒ `Object`

Parameters:

the (String) —

p probability to get

# File 'lib/evoc/interestingness_measures.rb', line 145

def get_p(p)
  p = self.method(p).call
  if p_A < p_AB
    raise Evoc::Exceptions::MeasureCalculationError, "p_A was smaller than p_AB"
  elsif p_B < p_AB
    raise Evoc::Exceptions::MeasureCalculationError, "p_B was smaller than p_AB"
  elsif p_A == 0
    raise Evoc::Exceptions::MeasureCalculationError, "p_A was 0"
  elsif p_B == 0
    raise Evoc::Exceptions::MeasureCalculationError, "p_B was 0"
  elsif p_B > 1-p_A+p_AB
    raise Evoc::Exceptions::MeasureCalculationError, "p_B mismatch with p_A and p_AB, (a #{p_a}, b #{p_B}, ab #{p_AB})"
  elsif p_A > 1-p_B+p_AB
    raise Evoc::Exceptions::MeasureCalculationError, "p_A mismatch with p_B and p_AB, (a #{p_a}, b #{p_B}, ab #{p_AB})"
  end
  return p
end

#get_p_values ⇒ `Hash`

Returns a hash containing the p probabilities of this rule.

Returns:

(Hash) —

a hash containing the p probabilities of this rule

# File 'lib/evoc/interestingness_measures.rb', line 165

def get_p_values
  values = Hash.new
  Evoc::Rule.p_measures.each do |p|
    values[p] = self.get_p(p)
  end
  return values
end

#instantiated_measures ⇒ `Object`

returns the measures which has been instantiated



175
176
177

# File 'lib/evoc/interestingness_measures.rb', line 175

def instantiated_measures
  self.class.measures.select {|m| measure_instantiated?(m) }
end

#lhs ⇒ `Object`

Raises:

(NotImplementedError.new)



14
15
16

# File 'lib/evoc/interestingness_measures.rb', line 14

def lhs
  raise NotImplementedError.new, "lhs has not been implemented on the current class"
end

#m_added_value ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 347

def m_added_value
  @m_added_value ||= self.class.initialize_measure(__method__) {
    p_BgivenA - p_B
  }
end

#m_causal_confidence ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 356

def m_causal_confidence
  @m_causal_confidence ||= self.class.initialize_measure(__method__) {
    (1.to_r/2)*(p_BgivenA + p_notAgivennotB)
  }
end

#m_causal_support ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 365

def m_causal_support
  @m_causal_support ||= self.class.initialize_measure(__method__) {
    p_AB + p_notA_notB
  }
end

#m_collective_strength ⇒ `Object`

range from Aggarwal1998

# File 'lib/evoc/interestingness_measures.rb', line 552

def m_collective_strength
  @m_collective_strength ||= self.class.initialize_measure(__method__) {
    n1 = (p_AB+p_notBgivennotA)
    d1 = (p_A*p_B+p_notA*p_notB)
    first = ((d1 == 0) ? n1/d1.to_f : n1/d1)
    n2 = (1-p_A*p_B-p_notA*p_notB)
    d2 = (1-p_AB-p_notBgivennotA)
    second = ((d2 == 0) ? n2/d2.to_f : n2/d2)
    first * second
  }
end

#m_confidence ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 265

def m_confidence
  @m_confidence ||= self.class.initialize_measure(__method__) {
    p_BgivenA
  }
end

#m_conviction ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 517

def m_conviction
  @m_conviction ||= self.class.initialize_measure(__method__) {
    numerator = p_A*p_notB
    denominator = p_A_notB
    if denominator == 0
      if numerator == 0
        0
      else
        Float::INFINITY
      end
    else
      numerator/denominator
    end
  }
end

#m_cosine ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 647

def m_cosine
  @m_cosine ||= self.class.initialize_measure(__method__) {
    num = p_AB
    den = Math.sqrt(p_A*p_B).rationalize
    if den == 0
      raise Evoc::Exceptions::MeasureCalculationError, "Denominator became 0 when calculating cosine (a #{p_a}, b #{p_B}, ab #{p_AB})"
    else
      num/den
    end
  }
end

#m_coverage ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 274

def m_coverage
  @m_coverage ||= self.class.initialize_measure(__method__) {
    p_A
  }
end

#m_descriptive_confirmed_confidence ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 374

def m_descriptive_confirmed_confidence
  @m_descriptive_confirmed_confidence ||= self.class.initialize_measure(__method__) {
    p_BgivenA - p_notBgivenA
  }
end

#m_difference_of_confidence ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 383

def m_difference_of_confidence
  @m_difference_of_confidence ||= self.class.initialize_measure(__method__) {
    p_BgivenA - p_BgivennotA
  }
end

#m_example_and_counterexample_rate ⇒ `Object`

0 when equally many examples as counter examples

# File 'lib/evoc/interestingness_measures.rb', line 734

def m_example_and_counterexample_rate
  @m_example_and_counterexample_rate ||= self.class.initialize_measure(__method__) {
    numerator = (p_AB - p_A_notB)
    denominator = p_AB
    if denominator == 0
      -Float::INFINITY
    else
      numerator/denominator
    end
  }
end

#m_gini_index ⇒ `Object`

Measures quadratic entropy

# File 'lib/evoc/interestingness_measures.rb', line 571

def m_gini_index
  @m_gini_index ||= self.class.initialize_measure(__method__) {
    p_A*((p_BgivenA**2)+(p_notBgivenA**2))+p_notA*((p_BgivennotA**2)+(p_notBgivennotA**2))-p_B**2-p_notB**2
  }
end

#m_hyper_coefficient ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 211

def m_hyper_coefficient
  @m_hyper_coefficient = self.class.initialize_measure(__method__) {
    if self.respond_to?(:hyper_coefficient)
      self.hyper_coefficient
    else
      0
    end
  }
end

#m_imbalance_ratio ⇒ `Object`

IR gauges the degree of imbalance between two events that the lhs and the rhs are contained in a transaction. The ratio is close to 0 if the conditional probabilities are similar (i.e., very balanced) and close to 1 if they are very different

# File 'lib/evoc/interestingness_measures.rb', line 419

def m_imbalance_ratio
  @m_imbalance_ratio ||= self.class.initialize_measure(__method__) {
    numerator = (p_AgivenB - p_BgivenA).abs
    denominator = (p_AgivenB + p_BgivenA - p_AgivenB*p_BgivenA)
    if denominator == 0
      if numerator ==  0
        0
      else
        raise Evoc::MeasureCalculationError.new, "Numerator was not 0 when denominator was 0 when calculating imbalance ratio"
      end
    else
      numerator/denominator
    end
  }
end

#m_interestingness_weighting_dependency ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 540

def m_interestingness_weighting_dependency
  k = 2
  m = 2
  @m_interestingness_weighting_dependency ||= self.class.initialize_measure(__method__) {
    ((p_BgivenA/p_B)**(k-1))*(p_AB**m)
  }
end

#m_j_measure ⇒ `Object`

Measures cross entropy

# File 'lib/evoc/interestingness_measures.rb', line 595

def m_j_measure
  @m_j_measure ||= self.class.initialize_measure(__method__) {
    first_log = Math.log(p_BgivenA/(p_B.to_f))
    second_log = Math.log(p_notBgivenA/(p_notB.to_f))
    first = ((p_AB == 0) ? 0 : p_AB*first_log)
    second = ((p_A_notB == 0) ? 0 : p_A_notB*second_log)

    first+second
  }
end

#m_jaccard ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 405

def m_jaccard
  @m_jaccard ||= self.class.initialize_measure(__method__) {
    p_AB/(p_A+p_B-p_AB)
  }
end

#m_kappa ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 580

def m_kappa
  @m_kappa ||= self.class.initialize_measure(__method__) {
    num = (p_AB + p_notA_notB - p_A*p_B - p_notA*p_notB)
    den = (1 - p_A*p_B - p_notA*p_notB)
    ((den == 0) ? num/den.to_f : num/den)
  }
end

#m_klosgen ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 496

def m_klosgen
  @m_klosgen ||= self.class.initialize_measure(__method__) {
    (Math.sqrt(p_AB)*[(p_BgivenA-p_B),p_AgivenB-p_A].max).rationalize
  }
end

#m_kulczynski ⇒ `Object`

Calculate the null-invariant Kulczynski measure with a preference for skewed patterns.

# File 'lib/evoc/interestingness_measures.rb', line 508

def m_kulczynski
  @m_kulczynski ||= self.class.initialize_measure(__method__) {
    (p_AB/2)*((1/p_A)+(1/p_B))
  }
end

#m_laplace_corrected_confidence ⇒ `Object`

Corrected confidence estimate decreases with lower support to account for estimation uncertainty with low counts.

# File 'lib/evoc/interestingness_measures.rb', line 768

def m_laplace_corrected_confidence
  @m_laplace_corrected_confidence ||= self.class.initialize_measure(__method__) {
    (p_AB + 1)/(p_B + 2)
  }
end

#m_least_contradiction ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 705

def m_least_contradiction
  @m_least_contradiction ||= self.class.initialize_measure(__method__) {
    (p_AB-p_A_notB)/p_B
  }
end

#m_leverage ⇒ `Object`

Leverage measures the difference of X and Y appearing together in the data set and what would be expected if X and Y where statistically dependent

# File 'lib/evoc/interestingness_measures.rb', line 328

def m_leverage
  @m_leverage ||= self.class.initialize_measure(__method__) {
    p_BgivenA - (p_A*p_B)
  }
end

#m_lift ⇒ `Object`

aka interest Lift measures how many times more often X and Y occur together than expected if they where statistically independent

# File 'lib/evoc/interestingness_measures.rb', line 315

def m_lift
  @m_lift ||= self.class.initialize_measure(__method__) {
    p_AB/(p_A*p_B)
  }
end

#m_linear_correlation_coefficient ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 636

def m_linear_correlation_coefficient
  @m_linear_correlation_coefficient ||= self.class.initialize_measure(__method__) {
    num = (p_AB-(p_A*p_B))
    den = (Math.sqrt(p_A*p_B*p_notA*p_notB))
    ((den == 0) ? num/den.to_f : num/(den.rationalize))
  }
end

#m_loevinger ⇒ `Object`

aka Certainty Factor The certainty factor is a measure of variation of the probability that Y is in a transaction when only considering transactions with X. An increasing CF means a decrease of the probability that Y is not in a transaction that X is in. Negative CFs have a similar interpretation

# File 'lib/evoc/interestingness_measures.rb', line 670

def m_loevinger
  @m_loevinger ||= self.class.initialize_measure(__method__) {
    if p_A*p_notB == 0
      1
    else
      1 - ((p_A_notB)/(p_A*p_notB))
    end
  }
end

#m_odd_multiplier ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 714

def m_odd_multiplier
  @m_odd_multiplier ||= self.class.initialize_measure(__method__) {
    numerator = p_AB*p_notB
    denominator = p_B*p_A_notB
    if denominator == 0
      if numerator == 0
        0
      else
        Float::INFINITY
      end
    else
      numerator/denominator
    end
  }
end

#m_odds_ratio ⇒ `Object`

The odds of finding X in transactions which contain Y divided by the odds of finding X in transactions which do not contain Y

# File 'lib/evoc/interestingness_measures.rb', line 442

def m_odds_ratio
  @m_odds_ratio ||= self.class.initialize_measure(__method__) {
    numerator = p_AB*p_notA_notB
    denominator = p_A_notB*p_notA_B
    if denominator == 0
      Float::INFINITY
    else
      numerator/denominator
    end
  }
end

#m_one_way_support ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 609

def m_one_way_support
  @m_one_way_support ||= self.class.initialize_measure(__method__) {
    if p_BgivenA == 0
      0
    else
      p_BgivenA*(Math.log2(p_AB/(p_A*p_B)).rationalize)
    end
  }
end

#m_piatetsky_shapiro ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 337

def m_piatetsky_shapiro
  @m_piatetsky_shapiro ||= self.class.initialize_measure(__method__) {
    p_AB - p_A*p_B
  }
end

#m_prevalence ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 283

def m_prevalence
  @m_prevalence ||= self.class.initialize_measure(__method__) {
    p_B
  }
end

#m_recall ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 292

def m_recall
  @m_recall ||= self.class.initialize_measure(__method__) {
    p_AgivenB
  }
end

#m_relative_risk ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 392

def m_relative_risk
  @m_relative_risk ||= self.class.initialize_measure(__method__) {
    if p_BgivennotA == 0
      Float::INFINITY
    else
      p_BgivenA/p_BgivennotA
    end
  }
end

#m_sebag_schoenauer ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 683

def m_sebag_schoenauer
  @m_sebag_schoenauer ||= self.class.initialize_measure(__method__) {
    if p_A_notB == 0
      Float::INFINITY
    else
      p_AB/p_A_notB
    end
  }
end

#m_specificity ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 301

def m_specificity
  @m_specificity ||= self.class.initialize_measure(__method__) {
    p_notBgivennotA
  }
end

#m_support ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 256

def m_support
  @m_support ||= self.class.initialize_measure(__method__) {
    p_AB
  }
end

#m_two_way_support ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 622

def m_two_way_support
  @m_two_way_support ||= self.class.initialize_measure(__method__) {
    if p_AB == 0
      0
    else
      p_AB*(Math.log2(p_AB/(p_A*p_B)).rationalize)
    end
  }
end

#m_varying_rates_liaison ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 696

def m_varying_rates_liaison
  @m_varying_rates_liaison ||= self.class.initialize_measure(__method__) {
    (p_AB/(p_A*p_B)) - 1
  }
end

#m_yules_q ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 457

def m_yules_q
  @m_yules_q ||= self.class.initialize_measure(__method__) {
    odds_ratio = self.m_odds_ratio.value
    if !odds_ratio.nil?
      if odds_ratio.to_f.finite?
        (odds_ratio - 1)/(odds_ratio + 1)
      else
        # -1 if odds ratio -inf
        # 1 if odds ratio +inf
        odds_ratio.to_f.infinite?
      end
    else
      raise Evoc::MeasureCalculationError.new, "Odds ratio was nil when calculating yules q"
    end
  }
end

#m_yules_y ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 477

def m_yules_y
  @m_yules_y ||= self.class.initialize_measure(__method__) {
    odds_ratio = self.m_odds_ratio.value
    if !odds_ratio.nil?
      if odds_ratio.to_f.finite?
        ((Math.sqrt(odds_ratio).rationalize) - 1)/((Math.sqrt(odds_ratio).rationalize) + 1)
      else
        odds_ratio.to_f.infinite?
      end
    else
      raise Evoc::Exceptions::MeasureCalculationError.new, "Odds ratio was nil when calculating yules y"
    end
  }
end

#m_zhang ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 749

def m_zhang
  @m_zhang ||= self.class.initialize_measure(__method__) {
    numerator = p_AB-p_A*p_B
    denominator = [p_AB*p_notB,p_B*p_A_notB].max
    if denominator == 0
      0
    else
      numerator/denominator
    end
  }
end

#measure_instantiated?(measure) ⇒ `Boolean`

Returns true if the measure has been instantiated

Returns:

(Boolean)



181
182
183

# File 'lib/evoc/interestingness_measures.rb', line 181

def measure_instantiated?(measure)
  !self.instance_variable_get('@'+measure.to_s).nil?
end

#n ⇒ `Object`

the number of transactions n is converted into the specified type to ensure that the type is used throughout calculations



784
785
786

# File 'lib/evoc/interestingness_measures.rb', line 784

def n
  @n ||= VALUE_TYPE.method(VALUE_TYPE.to_s).call(tx_store.size)
end

#name ⇒ `Object`

Raises:

(NotImplementedError.new)



22
23
24

# File 'lib/evoc/interestingness_measures.rb', line 22

def name
  raise NotImplementedError.new, "name has not been implemented on the current class"
end

#p_A ⇒ `Object`

the ratio of tx with A as a subset



790
791
792

# File 'lib/evoc/interestingness_measures.rb', line 790

def p_A
  @p_A ||= tx_store.transactions_of_list(lhs,strict: true).size/n
end

#p_A_notB ⇒ `Object`

the ratio of tx where A is a subset but B is not



838
839
840

# File 'lib/evoc/interestingness_measures.rb', line 838

def p_A_notB
  @p_A_notB ||= p_A - p_AB
end

#p_AB ⇒ `Object`

the ratio of tx with the union of A and B as a subset



814
815
816

# File 'lib/evoc/interestingness_measures.rb', line 814

def p_AB
  @p_AB ||= tx_store.transactions_of_list((lhs | rhs),strict: true).size/n
end

#p_AgivenB ⇒ `Object`

the ratio of the union being a subset to the number of txes where B is a subset



844
845
846

# File 'lib/evoc/interestingness_measures.rb', line 844

def p_AgivenB
  @p_AgivenB ||= ((p_B == 0) ? 0 : p_AB/p_B)
end

#p_AgivennotB ⇒ `Object`

if notB is 0, so is A_notB



864
865
866

# File 'lib/evoc/interestingness_measures.rb', line 864

def p_AgivennotB
  @p_AgivennotB ||= ((p_notB == 0) ? 0 : p_A_notB/p_notB)
end

#p_AorB ⇒ `Object`

the ratio of tx where A or B is a subset



820
821
822

# File 'lib/evoc/interestingness_measures.rb', line 820

def p_AorB
  @p_AorB ||= p_A + p_B - p_AB 
end

#p_B ⇒ `Object`

the ratio of tx with B as a subset



802
803
804

# File 'lib/evoc/interestingness_measures.rb', line 802

def p_B
  @p_B ||= tx_store.transactions_of_list(rhs,strict: true).size/n
end

#p_BgivenA ⇒ `Object`

the ratio of the union being a subset to the number of txes where A is a subset



850
851
852

# File 'lib/evoc/interestingness_measures.rb', line 850

def p_BgivenA
  @p_BgivenA ||= ((p_A == 0) ? 0 : p_AB/p_A)
end

#p_BgivennotA ⇒ `Object`

if notA is 0, so is notA_B



869
870
871

# File 'lib/evoc/interestingness_measures.rb', line 869

def p_BgivennotA
  @p_BgivennotA ||= ((p_notA == 0) ? 0 : p_notA_B/p_notA)
end

#p_notA ⇒ `Object`

the ratio of tx where A is not a subset



796
797
798

# File 'lib/evoc/interestingness_measures.rb', line 796

def p_notA
  @p_notA ||= (1 - p_A)
end

#p_notA_B ⇒ `Object`

the ratio of tx where A is not a subset but B is



832
833
834

# File 'lib/evoc/interestingness_measures.rb', line 832

def p_notA_B
  @p_notA_B ||= p_B - p_AB
end

#p_notA_notB ⇒ `Object`

the ratio of tx where neither A or B is a subset



826
827
828

# File 'lib/evoc/interestingness_measures.rb', line 826

def p_notA_notB
  @p_notA_notB ||= 1 - (p_A + p_B) + p_AB
end

#p_notAgivenB ⇒ `Object`



854
855
856

# File 'lib/evoc/interestingness_measures.rb', line 854

def p_notAgivenB
  @p_notAgivenB ||= ((p_B == 0) ? 0 : p_notA_B/p_B)
end

#p_notAgivennotB ⇒ `Object`

if notB is 0, so is notA and notB



878
879
880

# File 'lib/evoc/interestingness_measures.rb', line 878

def p_notAgivennotB  
  @p_notAgivennotB ||= ((p_notB == 0) ? 0 : p_notA_notB/p_notB)
end

#p_notB ⇒ `Object`

the ratio of tx where B is not a subset



808
809
810

# File 'lib/evoc/interestingness_measures.rb', line 808

def p_notB
  @p_notB ||= (1 - p_B)
end

#p_notBgivenA ⇒ `Object`

if A is 0, so is A,notB



859
860
861

# File 'lib/evoc/interestingness_measures.rb', line 859

def p_notBgivenA
  @p_notBgivenA ||= ((p_A == 0) ? 0 : p_A_notB/p_A)
end

#p_notBgivennotA ⇒ `Object`



873
874
875

# File 'lib/evoc/interestingness_measures.rb', line 873

def p_notBgivennotA  
  @p_notBgivennotA ||= ((p_notA == 0) ? 0 : p_notA_notB/p_notA)
end

#rhs ⇒ `Object`

Raises:

(NotImplementedError.new)



18
19
20

# File 'lib/evoc/interestingness_measures.rb', line 18

def rhs
  raise NotImplementedError.new, "rhs has not been implemented on the current class"
end

#set_measure(measure, value, hyper_measure: false) ⇒ `Object`

# File 'lib/evoc/interestingness_measures.rb', line 132

def set_measure(measure,value,hyper_measure: false)
  m = self.class.initialize_measure(measure, hyper_measure: hyper_measure) {value}
  self.instance_variable_set('@'+measure.to_s,m)
end

#set_p(p, value) ⇒ `Object`

manually set the probability p of this rule



139
140
141

# File 'lib/evoc/interestingness_measures.rb', line 139

def set_p(p,value)
  self.instance_variable_set('@'+p.to_s,value)
end

#to_a ⇒ `Object`



109
110
111

# File 'lib/evoc/interestingness_measures.rb', line 109

def to_a
  [lhs.join(',')] + [rhs.join(',')] + instantiated_measures.map {|m| self.get_measure(m).value}
end

#tx_store ⇒ `Object`

methods that must be implemented in the class that uses this module as a mixin

Raises:

(NotImplementedError.new)



10
11
12

# File 'lib/evoc/interestingness_measures.rb', line 10

def tx_store
  raise NotImplementedError.new, "tx_store has not been implemented on the current class"
end

Module: Evoc::InterestingnessMeasures

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.get_max(measure) ⇒ Object

.get_mid(measure) ⇒ Object

.get_min(measure) ⇒ Object

.hyper_measures ⇒ Object

.included(base) ⇒ Object

.is_hyper_measure?(m) ⇒ Boolean

.measures ⇒ Object

Instance Method Details

#get_measure(measure) ⇒ Object

#get_p(p) ⇒ Object

#get_p_values ⇒ Hash

#instantiated_measures ⇒ Object

#lhs ⇒ Object

#m_added_value ⇒ Object

#m_causal_confidence ⇒ Object

#m_causal_support ⇒ Object

#m_collective_strength ⇒ Object

#m_confidence ⇒ Object

#m_conviction ⇒ Object

#m_cosine ⇒ Object

#m_coverage ⇒ Object

#m_descriptive_confirmed_confidence ⇒ Object

#m_difference_of_confidence ⇒ Object

#m_example_and_counterexample_rate ⇒ Object

#m_gini_index ⇒ Object

#m_hyper_coefficient ⇒ Object

#m_imbalance_ratio ⇒ Object

#m_interestingness_weighting_dependency ⇒ Object

#m_j_measure ⇒ Object

#m_jaccard ⇒ Object

#m_kappa ⇒ Object

#m_klosgen ⇒ Object

#m_kulczynski ⇒ Object

#m_laplace_corrected_confidence ⇒ Object

#m_least_contradiction ⇒ Object

#m_leverage ⇒ Object

#m_lift ⇒ Object

#m_linear_correlation_coefficient ⇒ Object

#m_loevinger ⇒ Object

#m_odd_multiplier ⇒ Object

#m_odds_ratio ⇒ Object

#m_one_way_support ⇒ Object

#m_piatetsky_shapiro ⇒ Object

#m_prevalence ⇒ Object

#m_recall ⇒ Object

#m_relative_risk ⇒ Object

#m_sebag_schoenauer ⇒ Object

#m_specificity ⇒ Object

#m_support ⇒ Object

#m_two_way_support ⇒ Object

#m_varying_rates_liaison ⇒ Object

#m_yules_q ⇒ Object

#m_yules_y ⇒ Object

#m_zhang ⇒ Object

#measure_instantiated?(measure) ⇒ Boolean

#n ⇒ Object

#name ⇒ Object

#p_A ⇒ Object

#p_A_notB ⇒ Object

#p_AB ⇒ Object

#p_AgivenB ⇒ Object

#p_AgivennotB ⇒ Object

#p_AorB ⇒ Object

#p_B ⇒ Object

#p_BgivenA ⇒ Object

#p_BgivennotA ⇒ Object

#p_notA ⇒ Object

#p_notA_B ⇒ Object

#p_notA_notB ⇒ Object

#p_notAgivenB ⇒ Object

#p_notAgivennotB ⇒ Object

#p_notB ⇒ Object

#p_notBgivenA ⇒ Object

#p_notBgivennotA ⇒ Object

.get_max(measure) ⇒ `Object`

.get_mid(measure) ⇒ `Object`

.get_min(measure) ⇒ `Object`

.hyper_measures ⇒ `Object`

.included(base) ⇒ `Object`

.is_hyper_measure?(m) ⇒ `Boolean`

.measures ⇒ `Object`

#get_measure(measure) ⇒ `Object`

#get_p(p) ⇒ `Object`

#get_p_values ⇒ `Hash`

#instantiated_measures ⇒ `Object`

#lhs ⇒ `Object`

#m_added_value ⇒ `Object`

#m_causal_confidence ⇒ `Object`

#m_causal_support ⇒ `Object`

#m_collective_strength ⇒ `Object`

#m_confidence ⇒ `Object`

#m_conviction ⇒ `Object`

#m_cosine ⇒ `Object`

#m_coverage ⇒ `Object`

#m_descriptive_confirmed_confidence ⇒ `Object`

#m_difference_of_confidence ⇒ `Object`

#m_example_and_counterexample_rate ⇒ `Object`

#m_gini_index ⇒ `Object`

#m_hyper_coefficient ⇒ `Object`

#m_imbalance_ratio ⇒ `Object`

#m_interestingness_weighting_dependency ⇒ `Object`

#m_j_measure ⇒ `Object`

#m_jaccard ⇒ `Object`

#m_kappa ⇒ `Object`

#m_klosgen ⇒ `Object`

#m_kulczynski ⇒ `Object`

#m_laplace_corrected_confidence ⇒ `Object`

#m_least_contradiction ⇒ `Object`

#m_leverage ⇒ `Object`

#m_lift ⇒ `Object`

#m_linear_correlation_coefficient ⇒ `Object`

#m_loevinger ⇒ `Object`

#m_odd_multiplier ⇒ `Object`

#m_odds_ratio ⇒ `Object`

#m_one_way_support ⇒ `Object`

#m_piatetsky_shapiro ⇒ `Object`

#m_prevalence ⇒ `Object`

#m_recall ⇒ `Object`

#m_relative_risk ⇒ `Object`

#m_sebag_schoenauer ⇒ `Object`

#m_specificity ⇒ `Object`

#m_support ⇒ `Object`

#m_two_way_support ⇒ `Object`

#m_varying_rates_liaison ⇒ `Object`

#m_yules_q ⇒ `Object`

#m_yules_y ⇒ `Object`

#m_zhang ⇒ `Object`

#measure_instantiated?(measure) ⇒ `Boolean`

#n ⇒ `Object`

#name ⇒ `Object`

#p_A ⇒ `Object`

#p_A_notB ⇒ `Object`

#p_AB ⇒ `Object`

#p_AgivenB ⇒ `Object`

#p_AgivennotB ⇒ `Object`

#p_AorB ⇒ `Object`

#p_B ⇒ `Object`

#p_BgivenA ⇒ `Object`

#p_BgivennotA ⇒ `Object`

#p_notA ⇒ `Object`

#p_notA_B ⇒ `Object`

#p_notA_notB ⇒ `Object`

#p_notAgivenB ⇒ `Object`

#p_notAgivennotB ⇒ `Object`

#p_notB ⇒ `Object`

#p_notBgivenA ⇒ `Object`

#p_notBgivennotA ⇒ `Object`

#rhs ⇒ `Object`

#set_measure(measure, value, hyper_measure: false) ⇒ `Object`

#set_p(p, value) ⇒ `Object`

#to_a ⇒ `Object`

#tx_store ⇒ `Object`