Class: Remi::Testing::BusinessRules::DataSubject

Inherits:

Object

Object
Remi::Testing::BusinessRules::DataSubject

show all

Defined in:: lib/remi/testing/business_rules.rb

Instance Attribute Summary collapse

#name ⇒ Object readonly
Returns the value of attribute name.

Instance Method Summary collapse

#add_field(field_name) ⇒ Object
#append_data_with(example) ⇒ Object
#column_hash ⇒ Object
Public: Converts the data subject to a hash where the keys are the table columns and the values are an array for the value of column for each row.
#cumulative_dist_from_freq_table(table, freq_field: 'frequency') ⇒ Object
#data_subject ⇒ Object
#distribute_values(table) ⇒ Object
#example_to_df(example) ⇒ Object
#field ⇒ Object
#fields ⇒ Object
#freq_by(*field_names) ⇒ Object
#generate_values_from_cumulative_dist(n_records, cumulative_dist) ⇒ Object
#initialize(name, subject) ⇒ DataSubject constructor
A new instance of DataSubject.
#replicate_rows(n_rows) ⇒ Object
#size ⇒ Object
#stub_data ⇒ Object
#stub_data_with(example) ⇒ Object
#unique_integer_field(field_name) ⇒ Object
#where(field_name, operation) ⇒ Object
Would like to have this return a new DataSubject and not a dataframe.
#where_between(field_name, low_value, high_value) ⇒ Object
#where_gt(field_name, value) ⇒ Object
#where_in(field_name, list) ⇒ Object
#where_is(field_name, value) ⇒ Object
#where_lt(field_name, value) ⇒ Object

Constructor Details

#initialize(name, subject) ⇒ `DataSubject`

Returns a new instance of DataSubject.

# File 'lib/remi/testing/business_rules.rb', line 256

def initialize(name, subject)
  @name = name
  @data_subject = subject
  @fields = DataFieldCollection.new

  stub_data
end

Instance Attribute Details

#name ⇒ `Object` (readonly)

Returns the value of attribute name.



264
265
266

# File 'lib/remi/testing/business_rules.rb', line 264

def name
  @name
end

Instance Method Details

#add_field(field_name) ⇒ `Object`



266
267
268

# File 'lib/remi/testing/business_rules.rb', line 266

def add_field(field_name)
  @fields.add_field(self, field_name)
end

#append_data_with(example) ⇒ `Object`



343
344
345

# File 'lib/remi/testing/business_rules.rb', line 343

def append_data_with(example)
  data_subject.df = data_subject.df.concat example_to_df(example)
end

#column_hash ⇒ `Object`

Public: Converts the data subject to a hash where the keys are the table columns and the values are an array for the value of column for each row.

# File 'lib/remi/testing/business_rules.rb', line 288

def column_hash
  data_subject.df.to_h.reduce({}) do |h, (k,v)|
    h[k.symbolize] = v.to_a
    h
  end
end

#cumulative_dist_from_freq_table(table, freq_field: 'frequency') ⇒ `Object`

# File 'lib/remi/testing/business_rules.rb', line 356

def cumulative_dist_from_freq_table(table, freq_field: 'frequency')
  cumulative_dist = {}
  freq_total = 0
  table.hashes.each do |row|
    low = freq_total
    high = freq_total + row[freq_field].to_f
    freq_total = high
    cumulative_dist[(low...high)] =   row.tap { |r| r.delete(freq_field) }
  end
  cumulative_dist
end

#data_subject ⇒ `Object`



282
283
284

# File 'lib/remi/testing/business_rules.rb', line 282

def data_subject
  @data_subject.dsl_eval
end

#distribute_values(table) ⇒ `Object`

# File 'lib/remi/testing/business_rules.rb', line 383

def distribute_values(table)
  cumulative_dist = cumulative_dist_from_freq_table(table)
  generated_data = generate_values_from_cumulative_dist(data_subject.df.size, cumulative_dist)

  generated_data.each do |field_name, data_array|
    vector_name = fields[field_name].field_name
    data_subject.df[vector_name] = Daru::Vector.new(data_array, index: data_subject.df.index)
  end
end

#example_to_df(example) ⇒ `Object`

# File 'lib/remi/testing/business_rules.rb', line 328

def example_to_df(example)
  df = example.to_df(data_subject.df.row[0].to_h, field_symbolizer: data_subject.field_symbolizer)
  data_subject.fields.each do |vector, metadata|
    if metadata[:type] == :json
      df[vector].recode! { |v| JSON.parse(v) rescue v }
    end
  end
  df
end

#field ⇒ `Object`



270
271
272

# File 'lib/remi/testing/business_rules.rb', line 270

def field
  @fields.only
end

#fields ⇒ `Object`



274
275
276

# File 'lib/remi/testing/business_rules.rb', line 274

def fields
  @fields
end

#freq_by(*field_names) ⇒ `Object`



393
394
395

# File 'lib/remi/testing/business_rules.rb', line 393

def freq_by(*field_names)
  data_subject.df.group_by(field_names).size * 1.0 / data_subject.df.size
end

#generate_values_from_cumulative_dist(n_records, cumulative_dist) ⇒ `Object`

# File 'lib/remi/testing/business_rules.rb', line 368

def generate_values_from_cumulative_dist(n_records, cumulative_dist)
  # Use the same key for reproducible tests
  psuedorand = Random.new(3856382695386)

  1.upto(n_records).reduce({}) do |h, idx|
    r = psuedorand.rand
    row_as_hash = cumulative_dist.select { |range| range.include? r }.values.first
    row_as_hash.each do |field_name, value|
      h[field_name] ||= []
      h[field_name] << value
    end
    h
  end
end

#replicate_rows(n_rows) ⇒ `Object`

# File 'lib/remi/testing/business_rules.rb', line 348

def replicate_rows(n_rows)
  replicated_df = Daru::DataFrame.new([], order: data_subject.df.vectors.to_a)
  data_subject.df.each do |vector|
    replicated_df[vector.name] = vector.to_a * n_rows
  end
  data_subject.df = replicated_df
end

#size ⇒ `Object`



278
279
280

# File 'lib/remi/testing/business_rules.rb', line 278

def size
  data_subject.df.size
end

#stub_data ⇒ `Object`



324
325
326

# File 'lib/remi/testing/business_rules.rb', line 324

def stub_data
  data_subject.stub_df if data_subject.respond_to? :stub_df
end

#stub_data_with(example) ⇒ `Object`

# File 'lib/remi/testing/business_rules.rb', line 338

def stub_data_with(example)
  stub_data
  data_subject.df = example_to_df(example)
end

#unique_integer_field(field_name) ⇒ `Object`

# File 'lib/remi/testing/business_rules.rb', line 397

def unique_integer_field(field_name)
  vector_name = fields[field_name].field_name
  i = 0
  data_subject.df[vector_name].recode! { |v| i += 1 }
end

#where(field_name, operation) ⇒ `Object`

Would like to have this return a new DataSubject and not a dataframe. Need more robust duping to make that feasible. Don't use results for anything more than size.



298
299
300

# File 'lib/remi/testing/business_rules.rb', line 298

def where(field_name, operation)
  data_subject.df.where(data_subject.df[field_name.symbolize(data_subject.field_symbolizer)].recode { |v| operation.call(v) })
end

#where_between(field_name, low_value, high_value) ⇒ `Object`



314
315
316

# File 'lib/remi/testing/business_rules.rb', line 314

def where_between(field_name, low_value, high_value)
  where(field_name, ->(v) { v.to_f.between?(low_value.to_f, high_value.to_f) })
end

#where_gt(field_name, value) ⇒ `Object`



310
311
312

# File 'lib/remi/testing/business_rules.rb', line 310

def where_gt(field_name, value)
  where(field_name, ->(v) { v.to_f > value.to_f })
end

#where_in(field_name, list) ⇒ `Object`

# File 'lib/remi/testing/business_rules.rb', line 318

def where_in(field_name, list)
  list_array = list.split(',').map { |v| v.strip }
  where(field_name, ->(v) { list_array.include?(v) })
end

#where_is(field_name, value) ⇒ `Object`



302
303
304

# File 'lib/remi/testing/business_rules.rb', line 302

def where_is(field_name, value)
  where(field_name, ->(v) { v == value })
end

#where_lt(field_name, value) ⇒ `Object`



306
307
308

# File 'lib/remi/testing/business_rules.rb', line 306

def where_lt(field_name, value)
  where(field_name, ->(v) { v.to_f < value.to_f })
end

Class: Remi::Testing::BusinessRules::DataSubject

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name, subject) ⇒ DataSubject

Instance Attribute Details

#name ⇒ Object (readonly)

Instance Method Details

#add_field(field_name) ⇒ Object

#append_data_with(example) ⇒ Object

#column_hash ⇒ Object

#cumulative_dist_from_freq_table(table, freq_field: 'frequency') ⇒ Object

#data_subject ⇒ Object

#distribute_values(table) ⇒ Object

#example_to_df(example) ⇒ Object

#field ⇒ Object

#fields ⇒ Object

#freq_by(*field_names) ⇒ Object

#generate_values_from_cumulative_dist(n_records, cumulative_dist) ⇒ Object

#replicate_rows(n_rows) ⇒ Object

#size ⇒ Object

#stub_data ⇒ Object

#stub_data_with(example) ⇒ Object

#unique_integer_field(field_name) ⇒ Object

#where(field_name, operation) ⇒ Object

#where_between(field_name, low_value, high_value) ⇒ Object

#where_gt(field_name, value) ⇒ Object

#where_in(field_name, list) ⇒ Object

#where_is(field_name, value) ⇒ Object

#where_lt(field_name, value) ⇒ Object