Class: Statsample::Multiset

Inherits:
Object show all
Defined in:
lib/statsample/multiset.rb

Overview

Multiset joins multiple dataset with the same fields and vectors but with different number of cases. This is the base class for stratified and cluster sampling estimation

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(fields) ⇒ Multiset

To create a multiset

  • Multiset.new(%wf2 f3) # define only fields



12
13
14
15
# File 'lib/statsample/multiset.rb', line 12

def initialize(fields)
  @fields=fields
  @datasets={}
end

Instance Attribute Details

#datasetsObject (readonly)

Array with Daru::DataFrame



9
10
11
# File 'lib/statsample/multiset.rb', line 9

def datasets
  @datasets
end

#fieldsObject (readonly)

Name of fields



7
8
9
# File 'lib/statsample/multiset.rb', line 7

def fields
  @fields
end

Class Method Details

.new_empty_vectors(fields, ds_names) ⇒ Object



16
17
18
19
20
21
22
23
# File 'lib/statsample/multiset.rb', line 16

def self.new_empty_vectors(fields,ds_names) 
  ms = Multiset.new(fields)
  ds_names.each do |d|
    ms.add_dataset(d, Daru::DataFrame.new({}, order: fields))
  end

  ms
end

Instance Method Details

#[](i) ⇒ Object



84
85
86
# File 'lib/statsample/multiset.rb', line 84

def [](i)
  @datasets[i]
end

#add_dataset(key, ds) ⇒ Object



61
62
63
64
65
66
67
# File 'lib/statsample/multiset.rb', line 61

def add_dataset(key,ds)
  if ds.vectors.to_a != @fields
    raise ArgumentError, "Dataset(#{ds.vectors.to_a.to_s})must have the same fields of the Multiset(#{@fields})"
  else
    @datasets[key] = ds
  end
end

#collect_vector(field) ⇒ Object



76
77
78
# File 'lib/statsample/multiset.rb', line 76

def collect_vector(field)
  @datasets.collect { |k,v| yield k, v[field] }
end

#datasets_namesObject



53
54
55
# File 'lib/statsample/multiset.rb', line 53

def datasets_names
  @datasets.keys.sort
end

#each(&block) ⇒ Object



88
89
90
91
92
93
# File 'lib/statsample/multiset.rb', line 88

def each(&block)
  @datasets.each {|k,ds|
    next if ds.nrows == 0
    block.call(k,ds)
  }
end

#each_vector(field) ⇒ Object



80
81
82
# File 'lib/statsample/multiset.rb', line 80

def each_vector(field)
  @datasets.each { |k,v| yield k, v[field] }
end

#n_datasetsObject



57
58
59
# File 'lib/statsample/multiset.rb', line 57

def n_datasets
  @datasets.size
end

#sum_field(field) ⇒ Object



68
69
70
71
72
73
74
75
# File 'lib/statsample/multiset.rb', line 68

def sum_field(field)
  @datasets.inject(0) {|a,da|
    stratum_name = da[0]
    vector       = da[1][field]
    val          = yield stratum_name,vector
    a + val
  }
end

#union(&block) ⇒ Object

Generate a new dataset as a union of partial dataset If block given, this is applied to each dataset before union



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/statsample/multiset.rb', line 26

def union(&block)
  union_field={}
  types={}
  names={}
  labels={}
  each do |k,ds|
    if block
      ds = ds.dup
      yield k,ds
    end
    @fields.each do |f|
      union_field[f] ||= Array.new
      union_field[f].concat(ds[f].to_a)
      types[f]  ||= ds[f].type
      names[f]  ||= ds[f].name
      labels[f] ||= ds[f].index.to_a
    end
  end
  
  @fields.each do |f|
    union_field[f] = Daru::Vector.new(union_field[f], name: names[f])
  end

  ds_union = Daru::DataFrame.new(union_field, order: @fields)
  ds_union
end