Class: Remi::BusinessRules::DataSubject
- Inherits:
-
Object
- Object
- Remi::BusinessRules::DataSubject
- Defined in:
- lib/remi/cucumber/business_rules.rb
Instance Attribute Summary collapse
-
#data_obj ⇒ Object
readonly
Returns the value of attribute data_obj.
Instance Method Summary collapse
-
#_df ⇒ Object
For debugging only.
- #add_field(field_name) ⇒ Object
-
#column_hash ⇒ Object
Public: Converts the data subject to a hash where the keys are the table columns and the values are an array for the value of column for each row.
- #csv_options ⇒ Object
- #cumulative_dist_from_freq_table(table, freq_field: 'frequency') ⇒ Object
- #distribute_values(table) ⇒ Object
- #extract ⇒ Object
- #field ⇒ Object
- #fields ⇒ Object
- #freq_by(*field_names) ⇒ Object
- #generate_values_from_cumulative_dist(n_records, cumulative_dist) ⇒ Object
-
#initialize(subject) ⇒ DataSubject
constructor
A new instance of DataSubject.
- #mock_extractor(filestore) ⇒ Object
- #replicate_rows(n_rows) ⇒ Object
- #size ⇒ Object
- #stub_data ⇒ Object
- #stub_data_with(example) ⇒ Object
-
#where(field_name, operation) ⇒ Object
Would like to have this return a new DataSubject and not a dataframe.
- #where_between(field_name, low_value, high_value) ⇒ Object
- #where_gt(field_name, value) ⇒ Object
- #where_in(field_name, list) ⇒ Object
- #where_is(field_name, value) ⇒ Object
- #where_lt(field_name, value) ⇒ Object
Constructor Details
#initialize(subject) ⇒ DataSubject
Returns a new instance of DataSubject.
213 214 215 216 217 218 |
# File 'lib/remi/cucumber/business_rules.rb', line 213 def initialize(subject) @data_obj = subject @fields = DataFieldCollection.new stub_data end |
Instance Attribute Details
#data_obj ⇒ Object (readonly)
Returns the value of attribute data_obj.
220 221 222 |
# File 'lib/remi/cucumber/business_rules.rb', line 220 def data_obj @data_obj end |
Instance Method Details
#_df ⇒ Object
For debugging only
248 249 250 |
# File 'lib/remi/cucumber/business_rules.rb', line 248 def _df @data_obj.df end |
#add_field(field_name) ⇒ Object
222 223 224 |
# File 'lib/remi/cucumber/business_rules.rb', line 222 def add_field(field_name) @fields.add_field(self, field_name) end |
#column_hash ⇒ Object
Public: Converts the data subject to a hash where the keys are the table columns and the values are an array for the value of column for each row.
240 241 242 243 244 245 |
# File 'lib/remi/cucumber/business_rules.rb', line 240 def column_hash @data_obj.df.to_hash.reduce({}) do |h, (k,v)| h[k.symbolize] = v.to_a h end end |
#csv_options ⇒ Object
352 353 354 |
# File 'lib/remi/cucumber/business_rules.rb', line 352 def @data_obj. end |
#cumulative_dist_from_freq_table(table, freq_field: 'frequency') ⇒ Object
300 301 302 303 304 305 306 307 308 309 310 |
# File 'lib/remi/cucumber/business_rules.rb', line 300 def cumulative_dist_from_freq_table(table, freq_field: 'frequency') cumulative_dist = {} freq_total = 0 table.hashes.each do |row| low = freq_total high = freq_total + row[freq_field].to_f freq_total = high cumulative_dist[(low...high)] = row.tap { |r| r.delete(freq_field) } end cumulative_dist end |
#distribute_values(table) ⇒ Object
327 328 329 330 331 332 333 334 335 |
# File 'lib/remi/cucumber/business_rules.rb', line 327 def distribute_values(table) cumulative_dist = cumulative_dist_from_freq_table(table) generated_data = generate_values_from_cumulative_dist(@data_obj.df.size, cumulative_dist) generated_data.each do |field_name, data_array| vector_name = fields[field_name].name @data_obj.df[vector_name] = Daru::Vector.new(data_array, index: @data_obj.df.index) end end |
#extract ⇒ Object
348 349 350 |
# File 'lib/remi/cucumber/business_rules.rb', line 348 def extract @data_obj.extractor.extract end |
#field ⇒ Object
226 227 228 |
# File 'lib/remi/cucumber/business_rules.rb', line 226 def field @fields.only end |
#fields ⇒ Object
230 231 232 |
# File 'lib/remi/cucumber/business_rules.rb', line 230 def fields @fields end |
#freq_by(*field_names) ⇒ Object
337 338 339 |
# File 'lib/remi/cucumber/business_rules.rb', line 337 def freq_by(*field_names) @data_obj.df.group_by(field_names).size * 1.0 / @data_obj.df.size end |
#generate_values_from_cumulative_dist(n_records, cumulative_dist) ⇒ Object
312 313 314 315 316 317 318 319 320 321 322 323 324 325 |
# File 'lib/remi/cucumber/business_rules.rb', line 312 def generate_values_from_cumulative_dist(n_records, cumulative_dist) # Use the same key for reproducible tests psuedorand = Random.new(3856382695386) 1.upto(n_records).reduce({}) do |h, idx| r = psuedorand.rand row_as_hash = cumulative_dist.select { |range| range.include? r }.values.first row_as_hash.each do |field_name, value| h[field_name] ||= [] h[field_name] << value end h end end |
#mock_extractor(filestore) ⇒ Object
341 342 343 344 345 346 |
# File 'lib/remi/cucumber/business_rules.rb', line 341 def mock_extractor(filestore) extractor = class << @data_obj.extractor; self; end extractor.send(:define_method, :all_entries, ->() { filestore.sftp_entries }) extractor.send(:define_method, :download, ->(to_download) { to_download.map { |e| e.name } }) end |
#replicate_rows(n_rows) ⇒ Object
292 293 294 295 296 297 298 |
# File 'lib/remi/cucumber/business_rules.rb', line 292 def replicate_rows(n_rows) replicated_df = Daru::DataFrame.new([], order: @data_obj.df.vectors.to_a) @data_obj.df.each do |vector| replicated_df[vector.name] = vector.to_a * n_rows end @data_obj.df = replicated_df end |
#size ⇒ Object
234 235 236 |
# File 'lib/remi/cucumber/business_rules.rb', line 234 def size @data_obj.df.size end |
#stub_data ⇒ Object
282 283 284 |
# File 'lib/remi/cucumber/business_rules.rb', line 282 def stub_data @data_obj.stub_df if @data_obj.respond_to? :stub_df end |
#stub_data_with(example) ⇒ Object
286 287 288 289 |
# File 'lib/remi/cucumber/business_rules.rb', line 286 def stub_data_with(example) stub_data @data_obj.df = example.to_df(@data_obj.df.row[0].to_hash, field_symbolizer: @data_obj.field_symbolizer) end |
#where(field_name, operation) ⇒ Object
Would like to have this return a new DataSubject and not a dataframe. Need more robust duping to make that feasible. Don’t use results for anything more than size.
256 257 258 |
# File 'lib/remi/cucumber/business_rules.rb', line 256 def where(field_name, operation) @data_obj.df.where(@data_obj.df[field_name.symbolize(@data_obj.field_symbolizer)].recode { |v| operation.call(v) }) end |
#where_between(field_name, low_value, high_value) ⇒ Object
272 273 274 |
# File 'lib/remi/cucumber/business_rules.rb', line 272 def where_between(field_name, low_value, high_value) where(field_name, ->(v) { v.to_f.between?(low_value.to_f, high_value.to_f) }) end |
#where_gt(field_name, value) ⇒ Object
268 269 270 |
# File 'lib/remi/cucumber/business_rules.rb', line 268 def where_gt(field_name, value) where(field_name, ->(v) { v.to_f > value.to_f }) end |
#where_in(field_name, list) ⇒ Object
276 277 278 279 |
# File 'lib/remi/cucumber/business_rules.rb', line 276 def where_in(field_name, list) list_array = list.split(',').map { |v| v.strip } where(field_name, ->(v) { list_array.include?(v) }) end |
#where_is(field_name, value) ⇒ Object
260 261 262 |
# File 'lib/remi/cucumber/business_rules.rb', line 260 def where_is(field_name, value) where(field_name, ->(v) { v == value }) end |
#where_lt(field_name, value) ⇒ Object
264 265 266 |
# File 'lib/remi/cucumber/business_rules.rb', line 264 def where_lt(field_name, value) where(field_name, ->(v) { v.to_f < value.to_f }) end |