Class: DataModeler::Dataset

Inherits:
Object
  • Object
show all
Includes:
ConvertingTimeAndIndices, IteratingBasedOnNext
Defined in:
lib/data_modeler/dataset/dataset.rb,
lib/data_modeler/exceptions.rb

Overview

Note:

checks to validate if enough data is present (given ntimes, tspread and look_ahead) should be done on the caller (typically DatasetGen)

Build complex inputs and targets from the data to train the model.

Defined Under Namespace

Classes: TimeNotFoundError

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from ConvertingTimeAndIndices

#idx, #time

Methods included from IteratingBasedOnNext

#each, #to_a

Constructor Details

#initialize(data, inputs:, targets:, first_idx:, end_idx:, ntimes:, tspread:, look_ahead:) ⇒ Dataset

Note:

we expect Datasets indices to be used with left inclusion but right exclusion, i.e. targets are considered in the range ‘[from,to)`

Returns a new instance of Dataset.



29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/data_modeler/dataset/dataset.rb', line 29

def initialize data, inputs:, targets:, first_idx:, end_idx:,
    ntimes:, tspread:, look_ahead:
  @data = data
  @input_series = inputs
  @target_series = targets
  @first_idx = first_idx
  @end_idx = end_idx
  @ntimes = ntimes
  @nrows = data[:time].size
  @tspread = tspread
  @look_ahead = look_ahead
  @target_idx = first_idx
  @input_idxs = init_inputs
end

Instance Attribute Details

#dataObject (readonly)

Returns the value of attribute data.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def data
  @data
end

#end_idxObject (readonly)

Returns the value of attribute end_idx.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def end_idx
  @end_idx
end

#first_idxObject (readonly)

Returns the value of attribute first_idx.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def first_idx
  @first_idx
end

#input_idxsObject (readonly)

Returns the value of attribute input_idxs.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def input_idxs
  @input_idxs
end

#input_seriesObject (readonly)

Returns the value of attribute input_series.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def input_series
  @input_series
end

#look_aheadObject (readonly)

Returns the value of attribute look_ahead.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def look_ahead
  @look_ahead
end

#nrowsObject (readonly)

Returns the value of attribute nrows.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def nrows
  @nrows
end

#ntimesObject (readonly)

Returns the value of attribute ntimes.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def ntimes
  @ntimes
end

#target_idxObject (readonly)

Returns the value of attribute target_idx.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def target_idx
  @target_idx
end

#target_seriesObject (readonly)

Returns the value of attribute target_series.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def target_series
  @target_series
end

#tspreadObject (readonly)

Returns the value of attribute tspread.



8
9
10
# File 'lib/data_modeler/dataset/dataset.rb', line 8

def tspread
  @tspread
end

Instance Method Details

#==(other) ⇒ void

This method returns an undefined value.

Overloaded comparison for easier testing



87
88
89
90
91
92
93
# File 'lib/data_modeler/dataset/dataset.rb', line 87

def == other
  self.class == other.class && # terminate check here if wrong class
    data.object_id == other.data.object_id && # both `data` point to same object
    (instance_variables - [:@data]).all? do |var|
      self.instance_variable_get(var) == other.instance_variable_get(var)
    end
end

#inputsArray

Builds inputs for the model



50
51
52
53
54
55
56
# File 'lib/data_modeler/dataset/dataset.rb', line 50

def inputs
  input_idxs.flat_map do |idx|
    input_series.collect do |s|
      data[s][idx]
    end
  end
end

#nextArray

Returns the next pair [inputs, targets] and increments the target



75
76
77
78
79
80
# File 'lib/data_modeler/dataset/dataset.rb', line 75

def next
  peek.tap do
    @target_idx += 1
    @input_idxs = init_inputs
  end
end

#peekArray

Returns the next pair [inputs, targets]

Raises:

  • (StopIteration)


68
69
70
71
# File 'lib/data_modeler/dataset/dataset.rb', line 68

def peek
  raise StopIteration if target_idx >= end_idx
  [inputs, targets]
end

#targetsArray

Builds targets for the model



60
61
62
63
64
# File 'lib/data_modeler/dataset/dataset.rb', line 60

def targets
  target_series.collect do |s|
    data[s][target_idx]
  end
end