Class: Statsample::StratifiedSample

Inherits:
Object
  • Object
show all
Defined in:
lib/statsample/multiset.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ms, strata_sizes) ⇒ StratifiedSample

Returns a new instance of StratifiedSample

Raises:

  • (TypeError)

202
203
204
205
206
207
208
209
210
# File 'lib/statsample/multiset.rb', line 202

def initialize(ms,strata_sizes)
  raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
  @ms=ms
  raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
  @strata_sizes=strata_sizes
  @population_size=@strata_sizes.inject(0) { |a,x| a+x[1] }
  @strata_number=@ms.n_datasets
  @sample_size=@ms.datasets.inject(0) { |a,x| a+x[1].nrows }
end

Class Method Details

.calculate_n_total(es) ⇒ Object


120
121
122
# File 'lib/statsample/multiset.rb', line 120

def calculate_n_total(es)
  es.inject(0) {|a,h| a+h['N'] }
end

.mean(*vectors) ⇒ Object

mean for an array of vectors


98
99
100
101
102
103
104
105
# File 'lib/statsample/multiset.rb', line 98

def mean(*vectors)
  n_total=0
  means=vectors.inject(0){|a,v|
    n_total+=v.size
    a+v.sum
  }
  means.to_f/n_total
end

.proportion_sd_esd_wor(es) ⇒ Object


197
198
199
# File 'lib/statsample/multiset.rb', line 197

def proportion_sd_esd_wor(es)
    Math::sqrt(proportion_variance_ksd_wor(es))
end

.proportion_sd_ksd_wor(es) ⇒ Object


171
172
173
# File 'lib/statsample/multiset.rb', line 171

def proportion_sd_ksd_wor(es)
    Math::sqrt(proportion_variance_ksd_wor(es))
end

.proportion_sd_ksd_wr(es) ⇒ Object


176
177
178
179
180
181
182
183
# File 'lib/statsample/multiset.rb', line 176

def proportion_sd_ksd_wr(es)
  n_total=calculate_n_total(es)
  sum=es.inject(0){|a,h|
    val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
    a+val
  }
  Math::sqrt(sum) * (1.0/n_total)
end

.proportion_variance_esd_wor(es) ⇒ Object


188
189
190
191
192
193
194
195
196
# File 'lib/statsample/multiset.rb', line 188

def proportion_variance_esd_wor(es)
  n_total=n_total=calculate_n_total(es)
  
  sum=es.inject(0){|a,h|
    a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
    a+val
  }
  Math::sqrt(sum) * (1.0/n_total**2)
end

.proportion_variance_ksd_wor(es) ⇒ Object


164
165
166
167
168
169
170
# File 'lib/statsample/multiset.rb', line 164

def proportion_variance_ksd_wor(es)
  n_total=calculate_n_total(es)
    es.inject(0){|a,h|
      val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
      a+val
    }
end

.proportion_variance_ksd_wr(es) ⇒ Object


184
185
186
# File 'lib/statsample/multiset.rb', line 184

def proportion_variance_ksd_wr(es)
    proportion_variance_ksd_wor(es)**2
end

.standard_error_esd_wor(es) ⇒ Object


148
149
150
# File 'lib/statsample/multiset.rb', line 148

def standard_error_esd_wor(es)
  Math::sqrt(variance_ksd_wor(es))
end

.standard_error_esd_wr(es) ⇒ Object


160
161
162
# File 'lib/statsample/multiset.rb', line 160

def standard_error_esd_wr(es)
  Math::sqrt(variance_esd_wr(es))
end

.standard_error_ksd_wor(es) ⇒ Object


132
133
134
# File 'lib/statsample/multiset.rb', line 132

def standard_error_ksd_wor(es)
  Math::sqrt(variance_ksd_wor(es))
end

.standard_error_ksd_wr(es) ⇒ Object


107
108
109
110
111
112
113
114
# File 'lib/statsample/multiset.rb', line 107

def standard_error_ksd_wr(es)
  n_total=0
  sum=es.inject(0){|a,h|
      n_total+=h['N']
      a+((h['N']**2 * h['s']**2) / h['n'].to_f)
  }
  (1.to_f / n_total)*Math::sqrt(sum)
end

.variance_esd_wor(es) ⇒ Object


138
139
140
141
142
143
144
145
# File 'lib/statsample/multiset.rb', line 138

def variance_esd_wor(es)
  n_total=calculate_n_total(es)
  sum=es.inject(0){|a,h|
    val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
    a+val
  }
  (1.0/(n_total**2))*sum
end

.variance_esd_wr(es) ⇒ Object


152
153
154
155
156
157
158
159
# File 'lib/statsample/multiset.rb', line 152

def variance_esd_wr(es)
  n_total=calculate_n_total(es)
    sum=es.inject(0){|a,h|
      val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
      a+val
    }
    (1.0/(n_total**2))*sum
end

.variance_ksd_wor(es) ⇒ Object

Source : Cochran (1972)


125
126
127
128
129
130
131
# File 'lib/statsample/multiset.rb', line 125

def variance_ksd_wor(es)
n_total=calculate_n_total(es)
es.inject(0){|a,h|
  val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
  a+val
}
end

.variance_ksd_wr(es) ⇒ Object


117
118
119
# File 'lib/statsample/multiset.rb', line 117

def variance_ksd_wr(es)
  standard_error_ksd_wr(es)**2
end

Instance Method Details

#mean(field) ⇒ Object

Population mean based on strata


247
248
249
250
251
# File 'lib/statsample/multiset.rb', line 247

def mean(field)
  @ms.sum_field(field) {|s_name,vector|
  stratum_ponderation(s_name)*vector.mean
  }
end

#population_sizeObject

Population size. Equal to sum of strata sizes Symbol: N<sub>h</sub>


217
218
219
# File 'lib/statsample/multiset.rb', line 217

def population_size
  @population_size
end

#proportion(field, v = 1) ⇒ Object

Population proportion based on strata


234
235
236
237
238
# File 'lib/statsample/multiset.rb', line 234

def proportion(field, v=1)
  @ms.sum_field(field) {|s_name,vector|
  stratum_ponderation(s_name)*vector.proportion(v)
  }
end

#proportion_sd_esd_wor(field, v = 1) ⇒ Object


280
281
282
283
284
285
286
# File 'lib/statsample/multiset.rb', line 280

def proportion_sd_esd_wor(field,v=1)
  es=@ms.collect_vector(field) {|s_n, vector|
    {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
  }
  
  StratifiedSample.proportion_sd_esd_wor(es)
end

#proportion_standard_error(field, v = 1) ⇒ Object


288
289
290
291
292
293
294
295
296
# File 'lib/statsample/multiset.rb', line 288

def proportion_standard_error(field,v=1)
  prop=proportion(field,v)
  sum=@ms.sum_field(field) {|s_name,vector|
    nh=vector.size
    s_size=@strata_sizes[s_name]
    (s_size**2 * (1-(nh / s_size)) * prop * (1-prop) / (nh - 1 ))
  }
  (1.quo(@population_size)) * Math::sqrt(sum)
end

#sample_sizeObject

Sample size. Equal to sum of sample of each stratum


221
222
223
# File 'lib/statsample/multiset.rb', line 221

def sample_size
  @sample_size
end

#standard_error_wor(field) ⇒ Object

Standard error with estimated population variance and without replacement. Source: Cochran (1972)


254
255
256
257
258
259
260
# File 'lib/statsample/multiset.rb', line 254

def standard_error_wor(field)
  es=@ms.collect_vector(field) {|s_n, vector|
    {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
  }
  
  StratifiedSample.standard_error_esd_wor(es)
end

#standard_error_wor_2(field) ⇒ Object

Standard error with estimated population variance and without replacement. Source: stattrek.com/Lesson6/STRAnalysis.aspx


265
266
267
268
269
270
271
# File 'lib/statsample/multiset.rb', line 265

def standard_error_wor_2(field)
  sum=@ms.sum_field(field) {|s_name,vector|
    s_size=@strata_sizes[s_name]
  (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
  }
  (1/@population_size.to_f)*Math::sqrt(sum)
end

#standard_error_wr(field) ⇒ Object


273
274
275
276
277
278
279
# File 'lib/statsample/multiset.rb', line 273

def standard_error_wr(field)
  es=@ms.collect_vector(field) {|s_n, vector|
    {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
  }
  
  StratifiedSample.standard_error_esd_wr(es)
end

#strata_numberObject

Number of strata


212
213
214
# File 'lib/statsample/multiset.rb', line 212

def strata_number
  @strata_number
end

#stratum_ponderation(h) ⇒ Object Also known as: wh

Stratum ponderation. Symbol: W<sub>h</sub>


241
242
243
# File 'lib/statsample/multiset.rb', line 241

def stratum_ponderation(h)
  @strata_sizes[h].to_f / @population_size
end

#stratum_size(h) ⇒ Object

Size of stratum x


225
226
227
# File 'lib/statsample/multiset.rb', line 225

def stratum_size(h)
  @strata_sizes[h]
end

#variance_pst(field, v = 1) ⇒ Object

Cochran(1971), p. 150


298
299
300
301
302
303
304
305
306
307
308
# File 'lib/statsample/multiset.rb', line 298

def variance_pst(field,v=1)
  sum=@ms.datasets.inject(0) {|a,da|
    stratum_name=da[0]
    ds=da[1]
    nh=ds.cases.to_f
    s_size=@strata_sizes[stratum_name]
    prop=ds[field].proportion(v)
    a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
  }
  (1/@population_size.to_f ** 2)*sum
end

#vectors_by_field(field) ⇒ Object


228
229
230
231
232
# File 'lib/statsample/multiset.rb', line 228

def vectors_by_field(field)
  @ms.datasets.collect{|k,ds|
    ds[field]
  }
end