Module: Glymour::Statistics
- Included in:
- Glymour::StructureLearning::LearningNet
- Defined in:
- lib/stats_module.rb
Defined Under Namespace
Classes: Variable, VariableContainer
Instance Method Summary collapse
-
#coindependent?(p_val, *variables) ⇒ Boolean
Takes two or more Variables Returns true if first two variables are coindependent given the rest.
Instance Method Details
#coindependent?(p_val, *variables) ⇒ Boolean
Takes two or more Variables Returns true if first two variables are coindependent given the rest
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/stats_module.rb', line 66 def coindependent?(p_val, *variables) #TODO: Raise an exception if variables have different tables? R.echo(false) # Push variable data into R variables.each do |var| # Rinruby can't handle true and false values, so use 1 and 0 resp. instead sanitized_values = var.values.map do |value| case value when true then 1 when false then 0 else value end end R.assign var.name, sanitized_values end R.eval " cond_data <- data.frame(\#{variables.map(&:name).join(', ')})\n t <-table(cond_data)\n EOF\n \n cond_vars = variables[2..(variables.length-1)]\n \n # If no conditioning variables are given, just return the chi square test for the first two\n if cond_vars.empty?\n R.eval \"chisq <- chisq.test(t)\"\n observed_p = R.pull \"chisq$p.value\"\n return observed_p > p_val\n end\n \n cond_values = cond_vars.map { |var| (1..var.values.uniq.length).collect }\n \n # Find the chi-squared statistic for every state of the conditioning variables and sum them\n chisq_sum = 0\n df = 0\n cond_values.inject!(&:product).map(&:flatten)\n cond_values.each do |value|\n R.eval <<-EOF\n partial_table <- t[,,\#{value.join(',')}]\n table_without_zero_columns <- partial_table[,-(which(colSums(partial_table) == 0))]\n chisq <- chisq.test(table_without_zero_columns)\n s <- chisq$statistic\n EOF\n \n observed_s = R.pull(\"s\").to_f\n chisq_sum += observed_s\n df += R.pull(\"chisq$parameter\").to_i\n end\n # Compute the p-value of the sum of statistics\n observed_p = 1 - R.pull(\"pchisq(\#{chisq_sum}, \#{df})\").to_f\n observed_p > p_val\nend\n" |