Class: Remi::BusinessRules::DataSubject

Inherits:
Object
  • Object
show all
Defined in:
lib/remi/cucumber/business_rules.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(subject) ⇒ DataSubject

Returns a new instance of DataSubject.



218
219
220
221
222
223
# File 'lib/remi/cucumber/business_rules.rb', line 218

def initialize(subject)
  @data_obj = subject
  @fields = DataFieldCollection.new

  stub_data
end

Instance Attribute Details

#data_objObject (readonly)

Returns the value of attribute data_obj.



225
226
227
# File 'lib/remi/cucumber/business_rules.rb', line 225

def data_obj
  @data_obj
end

Instance Method Details

#_dfObject

For debugging only



253
254
255
# File 'lib/remi/cucumber/business_rules.rb', line 253

def _df
  @data_obj.df
end

#add_field(field_name) ⇒ Object



227
228
229
# File 'lib/remi/cucumber/business_rules.rb', line 227

def add_field(field_name)
  @fields.add_field(self, field_name)
end

#append_data_with(example) ⇒ Object



300
301
302
# File 'lib/remi/cucumber/business_rules.rb', line 300

def append_data_with(example)
  @data_obj.df = @data_obj.df.concat example_to_df(example)
end

#column_hashObject

Public: Converts the data subject to a hash where the keys are the table columns and the values are an array for the value of column for each row.



245
246
247
248
249
250
# File 'lib/remi/cucumber/business_rules.rb', line 245

def column_hash
  @data_obj.df.to_hash.reduce({}) do |h, (k,v)|
    h[k.symbolize] = v.to_a
    h
  end
end

#csv_optionsObject



365
366
367
# File 'lib/remi/cucumber/business_rules.rb', line 365

def csv_options
  @data_obj.csv_options
end

#cumulative_dist_from_freq_table(table, freq_field: 'frequency') ⇒ Object



313
314
315
316
317
318
319
320
321
322
323
# File 'lib/remi/cucumber/business_rules.rb', line 313

def cumulative_dist_from_freq_table(table, freq_field: 'frequency')
  cumulative_dist = {}
  freq_total = 0
  table.hashes.each do |row|
    low = freq_total
    high = freq_total + row[freq_field].to_f
    freq_total = high
    cumulative_dist[(low...high)] =   row.tap { |r| r.delete(freq_field) }
  end
  cumulative_dist
end

#distribute_values(table) ⇒ Object



340
341
342
343
344
345
346
347
348
# File 'lib/remi/cucumber/business_rules.rb', line 340

def distribute_values(table)
  cumulative_dist = cumulative_dist_from_freq_table(table)
  generated_data = generate_values_from_cumulative_dist(@data_obj.df.size, cumulative_dist)

  generated_data.each do |field_name, data_array|
    vector_name = fields[field_name].name
    @data_obj.df[vector_name] = Daru::Vector.new(data_array, index: @data_obj.df.index)
  end
end

#example_to_df(example) ⇒ Object



291
292
293
# File 'lib/remi/cucumber/business_rules.rb', line 291

def example_to_df(example)
  example.to_df(@data_obj.df.row[0].to_hash, field_symbolizer: @data_obj.field_symbolizer)
end

#extractObject



361
362
363
# File 'lib/remi/cucumber/business_rules.rb', line 361

def extract
  @data_obj.extractor.extract
end

#fieldObject



231
232
233
# File 'lib/remi/cucumber/business_rules.rb', line 231

def field
  @fields.only
end

#fieldsObject



235
236
237
# File 'lib/remi/cucumber/business_rules.rb', line 235

def fields
  @fields
end

#freq_by(*field_names) ⇒ Object



350
351
352
# File 'lib/remi/cucumber/business_rules.rb', line 350

def freq_by(*field_names)
  @data_obj.df.group_by(field_names).size * 1.0 / @data_obj.df.size
end

#generate_values_from_cumulative_dist(n_records, cumulative_dist) ⇒ Object



325
326
327
328
329
330
331
332
333
334
335
336
337
338
# File 'lib/remi/cucumber/business_rules.rb', line 325

def generate_values_from_cumulative_dist(n_records, cumulative_dist)
  # Use the same key for reproducible tests
  psuedorand = Random.new(3856382695386)

  1.upto(n_records).reduce({}) do |h, idx|
    r = psuedorand.rand
    row_as_hash = cumulative_dist.select { |range| range.include? r }.values.first
    row_as_hash.each do |field_name, value|
      h[field_name] ||= []
      h[field_name] << value
    end
    h
  end
end

#mock_extractor(filestore) ⇒ Object



354
355
356
357
358
359
# File 'lib/remi/cucumber/business_rules.rb', line 354

def mock_extractor(filestore)
  extractor = class << @data_obj.extractor; self; end

  extractor.send(:define_method, :all_entries, ->() { filestore.sftp_entries })
  extractor.send(:define_method, :download, ->(to_download) { to_download.map { |e| e.name } })
end

#replicate_rows(n_rows) ⇒ Object



305
306
307
308
309
310
311
# File 'lib/remi/cucumber/business_rules.rb', line 305

def replicate_rows(n_rows)
  replicated_df = Daru::DataFrame.new([], order: @data_obj.df.vectors.to_a)
  @data_obj.df.each do |vector|
    replicated_df[vector.name] = vector.to_a * n_rows
  end
  @data_obj.df = replicated_df
end

#sizeObject



239
240
241
# File 'lib/remi/cucumber/business_rules.rb', line 239

def size
  @data_obj.df.size
end

#stub_dataObject



287
288
289
# File 'lib/remi/cucumber/business_rules.rb', line 287

def stub_data
  @data_obj.stub_df if @data_obj.respond_to? :stub_df
end

#stub_data_with(example) ⇒ Object



295
296
297
298
# File 'lib/remi/cucumber/business_rules.rb', line 295

def stub_data_with(example)
  stub_data
  @data_obj.df = example_to_df(example)
end

#where(field_name, operation) ⇒ Object

Would like to have this return a new DataSubject and not a dataframe. Need more robust duping to make that feasible. Don’t use results for anything more than size.



261
262
263
# File 'lib/remi/cucumber/business_rules.rb', line 261

def where(field_name, operation)
  @data_obj.df.where(@data_obj.df[field_name.symbolize(@data_obj.field_symbolizer)].recode { |v| operation.call(v) })
end

#where_between(field_name, low_value, high_value) ⇒ Object



277
278
279
# File 'lib/remi/cucumber/business_rules.rb', line 277

def where_between(field_name, low_value, high_value)
  where(field_name, ->(v) { v.to_f.between?(low_value.to_f, high_value.to_f) })
end

#where_gt(field_name, value) ⇒ Object



273
274
275
# File 'lib/remi/cucumber/business_rules.rb', line 273

def where_gt(field_name, value)
  where(field_name, ->(v) { v.to_f > value.to_f })
end

#where_in(field_name, list) ⇒ Object



281
282
283
284
# File 'lib/remi/cucumber/business_rules.rb', line 281

def where_in(field_name, list)
  list_array = list.split(',').map { |v| v.strip }
  where(field_name, ->(v) { list_array.include?(v) })
end

#where_is(field_name, value) ⇒ Object



265
266
267
# File 'lib/remi/cucumber/business_rules.rb', line 265

def where_is(field_name, value)
  where(field_name, ->(v) { v == value })
end

#where_lt(field_name, value) ⇒ Object



269
270
271
# File 'lib/remi/cucumber/business_rules.rb', line 269

def where_lt(field_name, value)
  where(field_name, ->(v) { v.to_f < value.to_f })
end