Method: Evoc::Experiment#sample_transactions

Defined in:
lib/evoc/experiment.rb

#sample_transactionsObject



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/evoc/experiment.rb', line 19

def sample_transactions
  # initialze a random number generator with fixed seed
  rand = Random.new(self.opts[:seed])
  # by default we can sample from the whole history
  sampling_history = Evoc::HistoryStore.base_history
  STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
  sample = []

  #################################################################################
  # performing filtering steps on min/max commits size and minimum previous history
  #################################################################################

  if !self.opts[:recent].nil?
    size = sampling_history.size
    sampling_history = sampling_history[[0,size-self.opts[:recent]].max..-1]
    STDERR.puts "    Filtering to the #{self.opts[:recent]} most recent transactions (new pool size: #{sampling_history.size})"
  end
  # filter out transactions larger than X
  if !self.opts[:minimum_commit_size].nil?
    sampling_history = sampling_history.select {|tx| tx.size >= self.opts[:minimum_commit_size]}
    STDERR.puts "    Filtering to txes larger than or equal to #{self.opts[:minimum_commit_size]} (new pool size: #{sampling_history.size})"
  end
  if !self.opts[:maximum_commit_size].nil?
    sampling_history = sampling_history.select {|tx| tx.size <= self.opts[:maximum_commit_size]}
    STDERR.puts "    Filtering to txes smaller than or equal to #{self.opts[:maximum_commit_size]} (new pool size: #{sampling_history.size})"
  end
  # only sample transactions that have at least 'minimum_history' previous history
  if !self.opts[:minimum_history].nil?
    sampling_history = sampling_history.select {|tx| tx.index >= self.opts[:minimum_history]}
    STDERR.puts "    Filtering to txes with at least #{self.opts[:minimum_history]} previous txes (new pool size: #{sampling_history.size})"
  end
  if !self.opts[:recent_viable].nil?
    size = sampling_history.size
    sampling_history = sampling_history[[0,size-self.opts[:recent_viable]].max..-1]
    STDERR.puts "    Filtering to the #{self.opts[:recent_viable]} most recent viable transactions (new pool size: #{sampling_history.size})"
  end

  filtering_switches = [:recent,:recent_viable,:minimum_commit_size,:maximum_commit_size,:minimum_history]
  if filtering_switches.any? {|s| !self.opts[s].nil?}
    if sampling_history.size == 0
      STDERR.puts "WARNING: All transactions were filtered out, unable to sample"
      return []
    end
  end

  if self.opts[:sample_size] > sampling_history.size 
    STDERR.puts "WARNING: The sample size is larger than the available transactions"
  end

  ######################
  # performing sampling
  ######################

  # group the txes by size
  groups = sampling_history.group_by {|tx| tx.size}
  # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
  # i.e., random sampling is done first, then the sampled txes are removed from the sampling
  tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
  tx_sizes_to_sample_from.each do |group_size|
    if group_size == '*'
      sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size], random: rand)
      sample << sampled_ids
      STDERR.puts "Sampled #{sampled_ids.size} txes"
      # remove sampled txes from sampling_history
      filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
      sampling_history.clear
      filtered_hist.each {|tx| sampling_history << tx}
    elsif group_size.to_i
      # check if there were any txes of this size
      if group = groups[group_size.to_i]
        if group.size < self.opts[:sample_size]
          logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
        end
        sampled_ids = group.sample(self.opts[:sample_size], random: rand).map(&:id)
        sample << sampled_ids
        STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
      else
        logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
      end
    else
      raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
    end
  end
  sample.flatten.uniq
end