Module: Glymour::Statistics

Included in:
Glymour::StructureLearning::LearningNet
Defined in:
lib/stats_module.rb

Defined Under Namespace

Classes: Variable, VariableContainer

Instance Method Summary collapse

Instance Method Details

#coindependent?(p_val, *variables) ⇒ Boolean

Takes two or more Variables Returns true if first two variables are coindependent given the rest

Returns:

  • (Boolean)


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/stats_module.rb', line 66

def coindependent?(p_val, *variables)
  #TODO: Raise an exception if variables have different tables?
  R.echo(false)
  # Push variable data into R
  variables.each do |var|
    # Rinruby can't handle true and false values, so use 1 and 0 resp. instead
    sanitized_values = var.values.map do |value|
      case value
        when true  then 1
        when false then 0
        else value
      end
    end
    
    R.assign var.name, sanitized_values
  end
  
  R.eval "    cond_data <- data.frame(\#{variables.map(&:name).join(', ')})\n    t <-table(cond_data)\n  EOF\n  \n  cond_vars = variables[2..(variables.length-1)]\n  \n  # If no conditioning variables are given, just return the chi square test for the first two\n  if cond_vars.empty?\n    R.eval \"chisq <- chisq.test(t)\"\n    observed_p = R.pull \"chisq$p.value\"\n    return observed_p > p_val\n  end\n  \n  cond_values = cond_vars.map { |var| (1..var.values.uniq.length).collect }\n  \n  # Find the chi-squared statistic for every state of the conditioning variables and sum them\n  chisq_sum = 0\n  df = 0\n  cond_values.inject!(&:product).map(&:flatten)\n  cond_values.each do |value|\n    R.eval <<-EOF\n      partial_table <- t[,,\#{value.join(',')}]\n      table_without_zero_columns <- partial_table[,-(which(colSums(partial_table) == 0))]\n      chisq <- chisq.test(table_without_zero_columns)\n      s <- chisq$statistic\n    EOF\n    \n    observed_s = R.pull(\"s\").to_f\n    chisq_sum += observed_s\n    df += R.pull(\"chisq$parameter\").to_i\n  end\n  # Compute the p-value of the sum of statistics\n  observed_p = 1 - R.pull(\"pchisq(\#{chisq_sum}, \#{df})\").to_f\n  observed_p > p_val\nend\n"