Class: Mirlo::Dataset
- Inherits:
-
Object
show all
- Includes:
- Enumerable, Plotting
- Defined in:
- lib/mirlo/dataset.rb
Overview
Public: Dataset class to store a set of samples with their associated targets.
Constant Summary
collapse
- DEFAULT_LABELS =
{
[0] => 'Zero',
[1] => 'Positive',
[-1] => 'Negative'
}
Instance Attribute Summary collapse
Instance Method Summary
collapse
Methods included from Plotting
#plot
Constructor Details
#initialize(samples: [], targets: nil, feature_names: [], title: "Dataset", add_bias: true, labels: DEFAULT_LABELS) ⇒ Dataset
Returns a new instance of Dataset.
17
18
19
20
21
22
23
24
25
26
27
|
# File 'lib/mirlo/dataset.rb', line 17
def initialize(samples: [], targets: nil, feature_names: [], title: "Dataset", add_bias: true, labels: DEFAULT_LABELS)
@feature_names ||= feature_names
@title ||= title
@labels ||= labels
@samples = if targets.nil?
samples
else
build_from_samples_and_targets(samples, targets)
end
end
|
Instance Attribute Details
#feature_names ⇒ Object
Returns the value of attribute feature_names.
15
16
17
|
# File 'lib/mirlo/dataset.rb', line 15
def feature_names
@feature_names
end
|
#samples ⇒ Object
Returns the value of attribute samples.
15
16
17
|
# File 'lib/mirlo/dataset.rb', line 15
def samples
@samples
end
|
#title ⇒ Object
Returns the value of attribute title.
15
16
17
|
# File 'lib/mirlo/dataset.rb', line 15
def title
@title
end
|
Instance Method Details
#each(*args, &block) ⇒ Object
64
65
66
|
# File 'lib/mirlo/dataset.rb', line 64
def each(*args, &block)
@samples.each(*args, &block)
end
|
#feature(feature_name_or_index) ⇒ Object
29
30
31
32
33
34
35
36
37
|
# File 'lib/mirlo/dataset.rb', line 29
def feature(feature_name_or_index)
index = if feature_names.include?(feature_name_or_index)
feature_names.index(feature_name_or_index)
else
feature_name_or_index
end
samples.collect { |sample| sample[index] }
end
|
76
77
78
|
# File 'lib/mirlo/dataset.rb', line 76
def input_matrix
@input_matrix ||= Matrix.rows(samples.collect(&:features), false)
end
|
#label_for(val) ⇒ Object
48
49
50
|
# File 'lib/mirlo/dataset.rb', line 48
def label_for(val)
@labels[val] || val
end
|
#num_features ⇒ Object
68
69
70
|
# File 'lib/mirlo/dataset.rb', line 68
def num_features
@num_features ||= samples.first.feature_size
end
|
#num_outputs ⇒ Object
72
73
74
|
# File 'lib/mirlo/dataset.rb', line 72
def num_outputs
@num_outputs ||= samples.first.target_size
end
|
#shuffle! ⇒ Object
84
85
86
87
88
89
90
91
92
93
|
# File 'lib/mirlo/dataset.rb', line 84
def shuffle!
@input_matrix = @target_matrix = nil
shuffled_positions = (0..size-1).to_a.shuffle
shuffled_samples = shuffled_positions.collect { |i| samples[i] }
shuffled_targets = shuffled_positions.collect { |i| targets[i] }
@samples, @targets = shuffled_samples, shuffled_targets
end
|
#size ⇒ Object
60
61
62
|
# File 'lib/mirlo/dataset.rb', line 60
def size
@samples.size
end
|
#subset_with_target(target) ⇒ Object
39
40
41
42
|
# File 'lib/mirlo/dataset.rb', line 39
def subset_with_target(target)
matching_samples = samples.find_all { |s| s.target == target }
Dataset.new(samples: matching_samples, feature_names: feature_names, title: target)
end
|
#target_matrix ⇒ Object
80
81
82
|
# File 'lib/mirlo/dataset.rb', line 80
def target_matrix
@target_matrix ||= Matrix.rows(samples.collect(&:target), false)
end
|
#target_set ⇒ Object
52
53
54
|
# File 'lib/mirlo/dataset.rb', line 52
def target_set
targets.uniq.sort
end
|
#targets ⇒ Object
56
57
58
|
# File 'lib/mirlo/dataset.rb', line 56
def targets
samples.collect(&:target)
end
|
#targets_for(feature_values) ⇒ Object
44
45
46
|
# File 'lib/mirlo/dataset.rb', line 44
def targets_for(feature_values)
samples.select { |s| s.has_features?(feature_values) }.collect(&:target)
end
|