Class: Mirlo::Dataset

Inherits:
Object
  • Object
show all
Includes:
Enumerable, Plotting
Defined in:
lib/mirlo/dataset.rb

Overview

Public: Dataset class to store a set of samples with their associated targets.

Direct Known Subclasses

AndDataSet, DoubleMoonDataSet, OrDataSet, XorDataSet

Constant Summary collapse

DEFAULT_LABELS =
{
  [0]  => 'Zero',
  [1]  => 'Positive',
  [-1] => 'Negative'
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Plotting

#plot

Constructor Details

#initialize(samples: [], targets: nil, feature_names: [], title: "Dataset", add_bias: true, labels: DEFAULT_LABELS) ⇒ Dataset

Returns a new instance of Dataset.



17
18
19
20
21
22
23
24
25
26
27
# File 'lib/mirlo/dataset.rb', line 17

def initialize(samples: [], targets: nil, feature_names: [], title: "Dataset", add_bias: true, labels: DEFAULT_LABELS)
  @feature_names ||= feature_names
  @title         ||= title
  @labels        ||= labels

  @samples = if targets.nil?
    samples
  else
    build_from_samples_and_targets(samples, targets)
  end
end

Instance Attribute Details

#feature_namesObject (readonly)

Returns the value of attribute feature_names.



15
16
17
# File 'lib/mirlo/dataset.rb', line 15

def feature_names
  @feature_names
end

#samplesObject (readonly)

Returns the value of attribute samples.



15
16
17
# File 'lib/mirlo/dataset.rb', line 15

def samples
  @samples
end

#titleObject (readonly)

Returns the value of attribute title.



15
16
17
# File 'lib/mirlo/dataset.rb', line 15

def title
  @title
end

Instance Method Details

#each(*args, &block) ⇒ Object



64
65
66
# File 'lib/mirlo/dataset.rb', line 64

def each(*args, &block)
  @samples.each(*args, &block)
end

#feature(feature_name_or_index) ⇒ Object



29
30
31
32
33
34
35
36
37
# File 'lib/mirlo/dataset.rb', line 29

def feature(feature_name_or_index)
  index = if feature_names.include?(feature_name_or_index)
    feature_names.index(feature_name_or_index)
  else
    feature_name_or_index
  end

  samples.collect { |sample| sample[index] }
end

#input_matrixObject



76
77
78
# File 'lib/mirlo/dataset.rb', line 76

def input_matrix
  @input_matrix ||= Matrix.rows(samples.collect(&:features), false)
end

#label_for(val) ⇒ Object



48
49
50
# File 'lib/mirlo/dataset.rb', line 48

def label_for(val)
  @labels[val] || val
end

#num_featuresObject



68
69
70
# File 'lib/mirlo/dataset.rb', line 68

def num_features
  @num_features ||= samples.first.feature_size
end

#num_outputsObject



72
73
74
# File 'lib/mirlo/dataset.rb', line 72

def num_outputs
  @num_outputs ||= samples.first.target_size
end

#shuffle!Object



84
85
86
87
88
89
90
91
92
93
# File 'lib/mirlo/dataset.rb', line 84

def shuffle!
  @input_matrix = @target_matrix = nil

  shuffled_positions = (0..size-1).to_a.shuffle

  shuffled_samples = shuffled_positions.collect { |i| samples[i] }
  shuffled_targets = shuffled_positions.collect { |i| targets[i] }

  @samples, @targets = shuffled_samples, shuffled_targets
end

#sizeObject



60
61
62
# File 'lib/mirlo/dataset.rb', line 60

def size
  @samples.size
end

#subset_with_target(target) ⇒ Object



39
40
41
42
# File 'lib/mirlo/dataset.rb', line 39

def subset_with_target(target)
  matching_samples = samples.find_all { |s| s.target == target }
  Dataset.new(samples: matching_samples, feature_names: feature_names, title: target)
end

#target_matrixObject



80
81
82
# File 'lib/mirlo/dataset.rb', line 80

def target_matrix
  @target_matrix ||= Matrix.rows(samples.collect(&:target), false)
end

#target_setObject



52
53
54
# File 'lib/mirlo/dataset.rb', line 52

def target_set
  targets.uniq.sort
end

#targetsObject



56
57
58
# File 'lib/mirlo/dataset.rb', line 56

def targets
  samples.collect(&:target)
end

#targets_for(feature_values) ⇒ Object



44
45
46
# File 'lib/mirlo/dataset.rb', line 44

def targets_for(feature_values)
  samples.select { |s| s.has_features?(feature_values) }.collect(&:target)
end