19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
# File 'lib/evoc/experiment.rb', line 19
def sample_transactions
rand = Random.new(self.opts[:seed])
sampling_history = Evoc::HistoryStore.base_history
STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
sample = []
if !self.opts[:recent].nil?
size = sampling_history.size
sampling_history = sampling_history[[0,size-self.opts[:recent]].max..-1]
STDERR.puts " Filtering to the #{self.opts[:recent]} most recent transactions (new pool size: #{sampling_history.size})"
end
if !self.opts[:minimum_commit_size].nil?
sampling_history = sampling_history.select {|tx| tx.size >= self.opts[:minimum_commit_size]}
STDERR.puts " Filtering to txes larger than or equal to #{self.opts[:minimum_commit_size]} (new pool size: #{sampling_history.size})"
end
if !self.opts[:maximum_commit_size].nil?
sampling_history = sampling_history.select {|tx| tx.size <= self.opts[:maximum_commit_size]}
STDERR.puts " Filtering to txes smaller than or equal to #{self.opts[:maximum_commit_size]} (new pool size: #{sampling_history.size})"
end
if !self.opts[:minimum_history].nil?
sampling_history = sampling_history.select {|tx| tx.index >= self.opts[:minimum_history]}
STDERR.puts " Filtering to txes with at least #{self.opts[:minimum_history]} previous txes (new pool size: #{sampling_history.size})"
end
if !self.opts[:recent_viable].nil?
size = sampling_history.size
sampling_history = sampling_history[[0,size-self.opts[:recent_viable]].max..-1]
STDERR.puts " Filtering to the #{self.opts[:recent_viable]} most recent viable transactions (new pool size: #{sampling_history.size})"
end
filtering_switches = [:recent,:recent_viable,:minimum_commit_size,:maximum_commit_size,:minimum_history]
if filtering_switches.any? {|s| !self.opts[s].nil?}
if sampling_history.size == 0
STDERR.puts "WARNING: All transactions were filtered out, unable to sample"
return []
end
end
if self.opts[:sample_size] > sampling_history.size
STDERR.puts "WARNING: The sample size is larger than the available transactions"
end
groups = sampling_history.group_by {|tx| tx.size}
tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
tx_sizes_to_sample_from.each do |group_size|
if group_size == '*'
sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size], random: rand)
sample << sampled_ids
STDERR.puts "Sampled #{sampled_ids.size} txes"
filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
sampling_history.clear
filtered_hist.each {|tx| sampling_history << tx}
elsif group_size.to_i
if group = groups[group_size.to_i]
if group.size < self.opts[:sample_size]
logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
end
sampled_ids = group.sample(self.opts[:sample_size], random: rand).map(&:id)
sample << sampled_ids
STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
else
logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
end
else
raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
end
end
sample.flatten.uniq
end
|