Class: ReqSample::Generator

Inherits:
Object
  • Object
show all
Defined in:
lib/reqsample/generator.rb

Overview

Main class for creating randomized data.

Constant Summary collapse

DEFAULT_COUNT =
1000
DEFAULT_DOMAIN =
'http://example.com'.freeze
DEFAULT_FORMAT =
:apache
DEFAULT_MAX_BYTES =
512

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(peak_sd = 4.0) ⇒ Generator

Returns a new instance of Generator.

Parameters:

  • peak_sd (Float) (defaults to: 4.0)

    standard deviation in the normal distribution



34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/reqsample/generator.rb', line 34

def initialize(peak_sd = 4.0)
  @agents = ReqSample::Hash.weighted(vendor('user_agents.json'))
  @codes = ReqSample::Hash.weighted(ReqSample::RESPONSE_CODES)
  # Peak at zero (will be summed with the Time object)
  @connectivity = ReqSample::Hash.weighted(
    vendor('country_connectivity.json')
  )
  @dist = Rubystats::NormalDistribution.new(0, peak_sd)
  @max_bytes = DEFAULT_MAX_BYTES
  @networks = vendor('country_networks.json')
  @verbs = ReqSample::Hash.weighted(ReqSample::REQUEST_VERBS)
end

Instance Attribute Details

#agentsObject

Returns the value of attribute agents.



15
16
17
# File 'lib/reqsample/generator.rb', line 15

def agents
  @agents
end

#codesObject

Returns the value of attribute codes.



15
16
17
# File 'lib/reqsample/generator.rb', line 15

def codes
  @codes
end

#connectivityObject

Returns the value of attribute connectivity.



15
16
17
# File 'lib/reqsample/generator.rb', line 15

def connectivity
  @connectivity
end

#distObject

Returns the value of attribute dist.



15
16
17
# File 'lib/reqsample/generator.rb', line 15

def dist
  @dist
end

#max_bytesObject

Returns the value of attribute max_bytes.



15
16
17
# File 'lib/reqsample/generator.rb', line 15

def max_bytes
  @max_bytes
end

#networksObject

Returns the value of attribute networks.



15
16
17
# File 'lib/reqsample/generator.rb', line 15

def networks
  @networks
end

#verbsObject

Returns the value of attribute verbs.



15
16
17
# File 'lib/reqsample/generator.rb', line 15

def verbs
  @verbs
end

Instance Method Details

#format(style, country, sample) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/reqsample/generator.rb', line 94

def format(style, country, sample)
  case style.to_s
  when 'apache'
    [
      "#{sample[:address]} - -",
      "[#{sample[:time].strftime('%d/%b/%Y:%H:%M:%S %z')}]",
      %("#{sample[:verb]} #{sample[:path]} HTTP/1.1"),
      sample[:code],
      sample[:bytes],
      %("#{sample[:domain]}"),
      %("#{sample[:agent]}")
    ].join ' '
  else
    { country => sample }
  end
end

#produce(opts = {}) ⇒ Array<String, Hash>

Returns the collection of generated log events.

Parameters:

  • opts (Hash) (defaults to: {})

    a customizable set of options

Options Hash (opts):

  • :count (Integer)

    how many logs to generate

  • :format (String)

    form to return logs, :apache or :hash

  • :peak (Time)

    normal distribution peak for log timestamps

  • :sleep (Boolean)

    whether or not to “realistically” sleep between emitting logs.

  • :truncate (Integer)

    hard limit to keep log range within

Returns:

  • (Array<String, Hash>)

    the collection of generated log events



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/reqsample/generator.rb', line 55

def produce(opts = {})
  opts[:count] ||= DEFAULT_COUNT
  opts[:format] ||= DEFAULT_FORMAT
  opts[:peak] ||= Time.now - (12 * 60 * 60)
  opts[:sleep] ||= false
  opts[:truncate] ||= 12

  return to_enum(:produce, opts) unless block_given?

  opts[:count].times do
    time = sample_time opts[:peak], opts[:truncate]
    if opts[:sleep] and (delay = time - Time.now) > 0 then sleep delay end
    yield sample time, opts[:format]
  end
end

#sample(time = nil, fmt = nil) ⇒ Object



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/reqsample/generator.rb', line 71

def sample(time = nil, fmt = nil)
  # Pull a random country, but ensure it's a valid country code for the
  # list of networks that we have available.
  country = connectivity.weighted_sample do |ccodes|
    ccodes.detect do |ccode|
      networks.keys.include? ccode
    end
  end

  sample = {
    address: sample_address(country),
    agent: agents.weighted_sample,
    bytes: rand(max_bytes),
    code: codes.weighted_sample,
    domain: DEFAULT_DOMAIN,
    path: ReqSample::REQUEST_PATHS.sample,
    time: time || sample_time(opts),
    verb: verbs.weighted_sample
  }

  format fmt, country, sample
end

#sample_address(country = nil) ⇒ Object



111
112
113
114
115
116
117
118
119
# File 'lib/reqsample/generator.rb', line 111

def sample_address(country = nil)
  country ||= networks.keys.sample

  head, tail = networks[country].sample
  IPAddr.new(
    rand(IPAddr.new(head).to_i..IPAddr.new(tail).to_i),
    Socket::AF_INET
  )
end

#sample_time(peak, truncate) ⇒ Object

Limit the normal distribution to +/- 12 hours (assume we want to stay within a 24-hour period).



123
124
125
126
127
128
# File 'lib/reqsample/generator.rb', line 123

def sample_time(peak, truncate)
  loop do
    sample = ReqSample::Time.at((peak + (dist.rng * 60 * 60)).to_i)
    break sample if sample.within peak, truncate
  end
end

#vendor(file) ⇒ Object



28
29
30
31
# File 'lib/reqsample/generator.rb', line 28

def vendor(file)
  v = File.expand_path('../../../vendor', __FILE__)
  JSON.parse(File.read(File.join(v, file)))
end