Class: Remi::BusinessRules::DataSubject
- Inherits:
-
Object
- Object
- Remi::BusinessRules::DataSubject
- Defined in:
- lib/remi/cucumber/business_rules.rb
Instance Attribute Summary collapse
-
#data_obj ⇒ Object
readonly
Returns the value of attribute data_obj.
Instance Method Summary collapse
-
#_df ⇒ Object
For debugging only.
- #add_field(field_name) ⇒ Object
-
#column_hash ⇒ Object
Public: Converts the data subject to a hash where the keys are the table columns and the values are an array for the value of column for each row.
- #csv_options ⇒ Object
- #cumulative_dist_from_freq_table(table, freq_field: 'frequency') ⇒ Object
- #distribute_values(table) ⇒ Object
- #extract ⇒ Object
- #field ⇒ Object
- #fields ⇒ Object
- #freq_by(*field_names) ⇒ Object
- #generate_values_from_cumulative_dist(n_records, cumulative_dist) ⇒ Object
-
#initialize(subject) ⇒ DataSubject
constructor
A new instance of DataSubject.
- #mock_extractor(filestore) ⇒ Object
- #replicate_rows(n_rows) ⇒ Object
- #size ⇒ Object
- #stub_data ⇒ Object
- #stub_data_with(example) ⇒ Object
-
#where(field_name, operation) ⇒ Object
Would like to have this return a new DataSubject and not a dataframe.
- #where_between(field_name, low_value, high_value) ⇒ Object
- #where_gt(field_name, value) ⇒ Object
- #where_in(field_name, list) ⇒ Object
- #where_is(field_name, value) ⇒ Object
- #where_lt(field_name, value) ⇒ Object
Constructor Details
#initialize(subject) ⇒ DataSubject
Returns a new instance of DataSubject.
215 216 217 218 219 220 |
# File 'lib/remi/cucumber/business_rules.rb', line 215 def initialize(subject) @data_obj = subject @fields = DataFieldCollection.new stub_data end |
Instance Attribute Details
#data_obj ⇒ Object (readonly)
Returns the value of attribute data_obj.
222 223 224 |
# File 'lib/remi/cucumber/business_rules.rb', line 222 def data_obj @data_obj end |
Instance Method Details
#_df ⇒ Object
For debugging only
250 251 252 |
# File 'lib/remi/cucumber/business_rules.rb', line 250 def _df @data_obj.df end |
#add_field(field_name) ⇒ Object
224 225 226 |
# File 'lib/remi/cucumber/business_rules.rb', line 224 def add_field(field_name) @fields.add_field(self, field_name) end |
#column_hash ⇒ Object
Public: Converts the data subject to a hash where the keys are the table columns and the values are an array for the value of column for each row.
242 243 244 245 246 247 |
# File 'lib/remi/cucumber/business_rules.rb', line 242 def column_hash @data_obj.df.to_hash.reduce({}) do |h, (k,v)| h[k.symbolize] = v.to_a h end end |
#csv_options ⇒ Object
354 355 356 |
# File 'lib/remi/cucumber/business_rules.rb', line 354 def @data_obj. end |
#cumulative_dist_from_freq_table(table, freq_field: 'frequency') ⇒ Object
302 303 304 305 306 307 308 309 310 311 312 |
# File 'lib/remi/cucumber/business_rules.rb', line 302 def cumulative_dist_from_freq_table(table, freq_field: 'frequency') cumulative_dist = {} freq_total = 0 table.hashes.each do |row| low = freq_total high = freq_total + row[freq_field].to_f freq_total = high cumulative_dist[(low...high)] = row.tap { |r| r.delete(freq_field) } end cumulative_dist end |
#distribute_values(table) ⇒ Object
329 330 331 332 333 334 335 336 337 |
# File 'lib/remi/cucumber/business_rules.rb', line 329 def distribute_values(table) cumulative_dist = cumulative_dist_from_freq_table(table) generated_data = generate_values_from_cumulative_dist(@data_obj.df.size, cumulative_dist) generated_data.each do |field_name, data_array| vector_name = fields[field_name].name @data_obj.df[vector_name] = Daru::Vector.new(data_array, index: @data_obj.df.index) end end |
#extract ⇒ Object
350 351 352 |
# File 'lib/remi/cucumber/business_rules.rb', line 350 def extract @data_obj.extractor.extract end |
#field ⇒ Object
228 229 230 |
# File 'lib/remi/cucumber/business_rules.rb', line 228 def field @fields.only end |
#fields ⇒ Object
232 233 234 |
# File 'lib/remi/cucumber/business_rules.rb', line 232 def fields @fields end |
#freq_by(*field_names) ⇒ Object
339 340 341 |
# File 'lib/remi/cucumber/business_rules.rb', line 339 def freq_by(*field_names) @data_obj.df.group_by(field_names).size * 1.0 / @data_obj.df.size end |
#generate_values_from_cumulative_dist(n_records, cumulative_dist) ⇒ Object
314 315 316 317 318 319 320 321 322 323 324 325 326 327 |
# File 'lib/remi/cucumber/business_rules.rb', line 314 def generate_values_from_cumulative_dist(n_records, cumulative_dist) # Use the same key for reproducible tests psuedorand = Random.new(3856382695386) 1.upto(n_records).reduce({}) do |h, idx| r = psuedorand.rand row_as_hash = cumulative_dist.select { |range| range.include? r }.values.first row_as_hash.each do |field_name, value| h[field_name] ||= [] h[field_name] << value end h end end |
#mock_extractor(filestore) ⇒ Object
343 344 345 346 347 348 |
# File 'lib/remi/cucumber/business_rules.rb', line 343 def mock_extractor(filestore) extractor = class << @data_obj.extractor; self; end extractor.send(:define_method, :all_entries, ->() { filestore.sftp_entries }) extractor.send(:define_method, :download, ->(to_download) { to_download.map { |e| e.name } }) end |
#replicate_rows(n_rows) ⇒ Object
294 295 296 297 298 299 300 |
# File 'lib/remi/cucumber/business_rules.rb', line 294 def replicate_rows(n_rows) replicated_df = Daru::DataFrame.new([], order: @data_obj.df.vectors.to_a) @data_obj.df.each do |vector| replicated_df[vector.name] = vector.to_a * n_rows end @data_obj.df = replicated_df end |
#size ⇒ Object
236 237 238 |
# File 'lib/remi/cucumber/business_rules.rb', line 236 def size @data_obj.df.size end |
#stub_data ⇒ Object
284 285 286 |
# File 'lib/remi/cucumber/business_rules.rb', line 284 def stub_data @data_obj.stub_df if @data_obj.respond_to? :stub_df end |
#stub_data_with(example) ⇒ Object
288 289 290 291 |
# File 'lib/remi/cucumber/business_rules.rb', line 288 def stub_data_with(example) stub_data @data_obj.df = example.to_df(@data_obj.df.row[0].to_hash, field_symbolizer: @data_obj.field_symbolizer) end |
#where(field_name, operation) ⇒ Object
Would like to have this return a new DataSubject and not a dataframe. Need more robust duping to make that feasible. Don’t use results for anything more than size.
258 259 260 |
# File 'lib/remi/cucumber/business_rules.rb', line 258 def where(field_name, operation) @data_obj.df.where(@data_obj.df[field_name.symbolize(@data_obj.field_symbolizer)].recode { |v| operation.call(v) }) end |
#where_between(field_name, low_value, high_value) ⇒ Object
274 275 276 |
# File 'lib/remi/cucumber/business_rules.rb', line 274 def where_between(field_name, low_value, high_value) where(field_name, ->(v) { v.to_f.between?(low_value.to_f, high_value.to_f) }) end |
#where_gt(field_name, value) ⇒ Object
270 271 272 |
# File 'lib/remi/cucumber/business_rules.rb', line 270 def where_gt(field_name, value) where(field_name, ->(v) { v.to_f > value.to_f }) end |
#where_in(field_name, list) ⇒ Object
278 279 280 281 |
# File 'lib/remi/cucumber/business_rules.rb', line 278 def where_in(field_name, list) list_array = list.split(',').map { |v| v.strip } where(field_name, ->(v) { list_array.include?(v) }) end |
#where_is(field_name, value) ⇒ Object
262 263 264 |
# File 'lib/remi/cucumber/business_rules.rb', line 262 def where_is(field_name, value) where(field_name, ->(v) { v == value }) end |
#where_lt(field_name, value) ⇒ Object
266 267 268 |
# File 'lib/remi/cucumber/business_rules.rb', line 266 def where_lt(field_name, value) where(field_name, ->(v) { v.to_f < value.to_f }) end |