Class: Vmpooler::Metrics::Promstats
- Inherits:
-
Vmpooler::Metrics
- Object
- Vmpooler::Metrics
- Vmpooler::Metrics::Promstats
- Defined in:
- lib/vmpooler/metrics/promstats.rb,
lib/vmpooler/metrics/promstats/collector_middleware.rb
Defined Under Namespace
Classes: CollectorMiddleware
Constant Summary collapse
- M_COUNTER =
Constants for Metric Types
1
- M_GAUGE =
2
- M_SUMMARY =
3
- M_HISTOGRAM =
4
- POOLER_CLONE_TIME_BUCKETS =
Customised Bucket set to use for the Pooler clone times set to more appropriate intervals.
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 120.0, 180.0, 240.0, 300.0, 600.0].freeze
- POOLER_READY_TIME_BUCKETS =
[30.0, 60.0, 120.0, 180.0, 240.0, 300.0, 500.0, 800.0, 1200.0, 1600.0].freeze
- REDIS_CONNECT_BUCKETS =
Same for redis connection times - this is the same as the current Prometheus Default. github.com/prometheus/client_ruby/blob/master/lib/prometheus/client/histogram.rb#L14
[1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 18.0, 23.0].freeze
Instance Attribute Summary collapse
-
#prefix ⇒ Object
readonly
Returns the value of attribute prefix.
-
#prometheus_endpoint ⇒ Object
readonly
Returns the value of attribute prometheus_endpoint.
-
#prometheus_prefix ⇒ Object
readonly
Returns the value of attribute prometheus_prefix.
Instance Method Summary collapse
-
#add_prometheus_metric(metric_spec, name, docstring) ⇒ Object
Helper to add individual prom metric.
-
#find_metric(label) ⇒ Object
locate a metric and check/interpet the sub-fields.
- #gauge(label, value) ⇒ Object
-
#get(label) ⇒ Object
Helper to get lab metrics.
-
#increment(label) ⇒ Object
Note - Catch and log metrics failures so they can be noted, but don’t interrupt vmpooler operation.
-
#initialize(logger, params = {}) ⇒ Promstats
constructor
rubocop:disable Lint/MissingSuper.
-
#setup_prometheus_metrics(torun) ⇒ Object
Top level method to register all the prometheus metrics.
- #timing(label, duration) ⇒ Object
-
#vmpooler_metrics_table ⇒ Object
The Metrics table is used to register metrics and translate/interpret the incoming metrics.
Methods inherited from Vmpooler::Metrics
Constructor Details
#initialize(logger, params = {}) ⇒ Promstats
rubocop:disable Lint/MissingSuper
27 28 29 30 31 32 33 34 35 |
# File 'lib/vmpooler/metrics/promstats.rb', line 27 def initialize(logger, params = {}) @prefix = params['prefix'] || 'vmpooler' @prometheus_prefix = params['prometheus_prefix'] || 'vmpooler' @prometheus_endpoint = params['prometheus_endpoint'] || '/prometheus' @logger = logger # Setup up prometheus registry and data structures @prometheus = Prometheus::Client.registry end |
Instance Attribute Details
#prefix ⇒ Object (readonly)
Returns the value of attribute prefix.
8 9 10 |
# File 'lib/vmpooler/metrics/promstats.rb', line 8 def prefix @prefix end |
#prometheus_endpoint ⇒ Object (readonly)
Returns the value of attribute prometheus_endpoint.
8 9 10 |
# File 'lib/vmpooler/metrics/promstats.rb', line 8 def prometheus_endpoint @prometheus_endpoint end |
#prometheus_prefix ⇒ Object (readonly)
Returns the value of attribute prometheus_prefix.
8 9 10 |
# File 'lib/vmpooler/metrics/promstats.rb', line 8 def prometheus_prefix @prometheus_prefix end |
Instance Method Details
#add_prometheus_metric(metric_spec, name, docstring) ⇒ Object
Helper to add individual prom metric. Allow Histograms to specify the bucket size.
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 |
# File 'lib/vmpooler/metrics/promstats.rb', line 338 def add_prometheus_metric(metric_spec, name, docstring) case metric_spec[:mtype] when M_COUNTER metric_class = Prometheus::Client::Counter when M_GAUGE metric_class = Prometheus::Client::Gauge when M_SUMMARY metric_class = Prometheus::Client::Summary when M_HISTOGRAM metric_class = Prometheus::Client::Histogram else raise("Unable to register metric #{name} with metric type #{metric_spec[:mtype]}") end if (metric_spec[:mtype] == M_HISTOGRAM) && (metric_spec.key? :buckets) prom_metric = metric_class.new( name.to_sym, docstring: docstring, labels: metric_spec[:param_labels] + [:vmpooler_instance], buckets: metric_spec[:buckets], preset_labels: { vmpooler_instance: @prefix } ) else prom_metric = metric_class.new( name.to_sym, docstring: docstring, labels: metric_spec[:param_labels] + [:vmpooler_instance], preset_labels: { vmpooler_instance: @prefix } ) end @prometheus.register(prom_metric) end |
#find_metric(label) ⇒ Object
locate a metric and check/interpet the sub-fields.
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 |
# File 'lib/vmpooler/metrics/promstats.rb', line 401 def find_metric(label) sublabels = label.split('.') metric_key = sublabels.shift.to_sym raise("Invalid Metric #{metric_key} for #{label}") unless @p_metrics.key? metric_key metric_spec = @p_metrics[metric_key] raise("Invalid Component #{component} for #{metric_key}") if (metric_spec[:torun] & @torun).nil? metric = metric_spec.clone if metric.key? :metric_suffixes metric_subkey = sublabels.shift.to_sym raise("Invalid Metric #{metric_key}_#{metric_subkey} for #{label}") unless metric[:metric_suffixes].key? metric_subkey.to_sym metric[:metric_name] = "#{@prometheus_prefix}_#{metric_key}_#{metric_subkey}" else metric[:metric_name] = "#{@prometheus_prefix}_#{metric_key}" end # Check if we are looking for a parameter value at last element. if metric.key? :param_labels metric[:labels] = {} # Special case processing here - if there is only one parameter label then make sure # we append all of the remaining contents of the metric with "." separators to ensure # we get full nodenames (e.g. for Migration to node operations) if metric[:param_labels].length == 1 metric[:labels][metric[:param_labels].first] = sublabels.join('.') else metric[:param_labels].reverse_each do |param_label| metric[:labels][param_label] = sublabels.pop(1).first end end end metric end |
#gauge(label, value) ⇒ Object
453 454 455 456 457 458 459 460 461 462 |
# File 'lib/vmpooler/metrics/promstats.rb', line 453 def gauge(label, value) begin unless value.nil? gauge_metric, g = get(label) g.set(value.to_i, labels: gauge_metric[:labels]) end rescue StandardError => e @logger.log('s', "[!] prometheus error logging gauge #{label}, value #{value}: #{e}") end end |
#get(label) ⇒ Object
Helper to get lab metrics.
438 439 440 441 |
# File 'lib/vmpooler/metrics/promstats.rb', line 438 def get(label) metric = find_metric(label) [metric, @prometheus.get(metric[:metric_name])] end |
#increment(label) ⇒ Object
Note - Catch and log metrics failures so they can be noted, but don’t interrupt vmpooler operation.
444 445 446 447 448 449 450 451 |
# File 'lib/vmpooler/metrics/promstats.rb', line 444 def increment(label) begin counter_metric, c = get(label) c.increment(labels: counter_metric[:labels]) rescue StandardError => e @logger.log('s', "[!] prometheus error logging metric #{label} increment : #{e}") end end |
#setup_prometheus_metrics(torun) ⇒ Object
Top level method to register all the prometheus metrics.
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 |
# File 'lib/vmpooler/metrics/promstats.rb', line 373 def setup_prometheus_metrics(torun) @torun = torun @p_metrics = vmpooler_metrics_table @p_metrics.each do |name, metric_spec| # Only register metrics appropriate to api or manager next if (torun & metric_spec[:torun]).empty? if metric_spec.key? :metric_suffixes # Iterate thru the suffixes if provided to register multiple counters here. metric_spec[:metric_suffixes].each do |metric_suffix| add_prometheus_metric( metric_spec, "#{@prometheus_prefix}_#{name}_#{metric_suffix[0]}", "#{metric_spec[:docstring]} #{metric_suffix[1]}" ) end else # No Additional counter suffixes so register this as metric. add_prometheus_metric( metric_spec, "#{@prometheus_prefix}_#{name}", metric_spec[:docstring] ) end end end |
#timing(label, duration) ⇒ Object
464 465 466 467 468 469 470 471 472 473 474 |
# File 'lib/vmpooler/metrics/promstats.rb', line 464 def timing(label, duration) begin # https://prometheus.io/docs/practices/histograms/ unless duration.nil? histogram_metric, hm = get(label) hm.observe(duration.to_f, labels: histogram_metric[:labels]) end rescue StandardError => e @logger.log('s', "[!] prometheus error logging timing event label #{label}, duration #{duration}: #{e}") end end |
#vmpooler_metrics_table ⇒ Object
The Metrics table is used to register metrics and translate/interpret the incoming metrics.
This table describes all of the prometheus metrics that are recognised by the application.
The background documentation for defining metrics is at: https://prometheus.io/docs/introduction/
In particular, the naming practices should be adhered to: https://prometheus.io/docs/practices/naming/
The Ruby Client docs are also useful: https://github.com/prometheus/client_ruby
The table here allows the currently used stats definitions to be translated correctly for Prometheus.
The current format is of the form A.B.C, where the final fields may be actual values (e.g. poolname).
Prometheus metrics cannot use the '.' as a character, so this is either translated into '_' or
variable parameters are expressed as labels accompanying the metric.
Sample statistics are:
# Example showing hostnames (FQDN)
migrate_from.pix-jj26-chassis1-2.ops.puppetlabs.net
migrate_to.pix-jj26-chassis1-8.ops.puppetlabs.net
# Example showing poolname as a parameter
poolreset.invalid.centos-8-x86_64
# Examples showing similar sub-typed checkout stats
checkout.empty.centos-8-x86_64
checkout.invalid.centos-8-x86_64
checkout.invalid.unknown
checkout.success.centos-8-x86_64
# Stats without any final parameter.
connect.fail
connect.open
delete.failed
delete.success
# Stats with multiple param_labels
vmpooler_user.debian-8-x86_64-pixa4.john
The metrics implementation here preserves the existing framework which will continue to support
graphite and statsd (since vmpooler is used outside of puppet). Some rationalisation and renaming
of the actual metrics was done to get a more usable model to fit within the prometheus framework.
This particularly applies to the user stats collected once individual machines are terminated as
this would have challenged prometheus' ability due to multiple (8) parameters being collected
in a single measure (which has a very high cardinality).
Prometheus requires all metrics to be pre-registered (which is the primary reason for this
table) and also uses labels to differentiate the characteristics of the measurement. This
is used throughout to capture information such as poolnames. So for example, this is a sample
of the prometheus metrics generated for the "vmpooler_ready" measurement:
# TYPE vmpooler_ready gauge
# HELP vmpooler_ready vmpooler number of machines in ready State
vmpooler_ready{vmpooler_instance="vmpooler",poolname="win-10-ent-x86_64-pixa4"} 2.0
vmpooler_ready{vmpooler_instance="vmpooler",poolname="debian-8-x86_64-pixa4"} 2.0
vmpooler_ready{vmpooler_instance="vmpooler",poolname="centos-8-x86_64-pixa4"} 2.0
Prometheus supports the following metric types:
(see https://prometheus.io/docs/concepts/metric_types/)
Counter (increment):
A counter is a cumulative metric that represents a single monotonically increasing counter whose
value can only increase or be reset to zero on restart
Gauge:
A gauge is a metric that represents a single numerical value that can arbitrarily go up and down.
Histogram:
A histogram samples observations (usually things like request durations or response sizes) and
counts them in configurable buckets. It also provides a sum of all observed values.
This replaces the timer metric supported by statsd
Summary :
Summary provides a total count of observations and a sum of all observed values, it calculates
configurable quantiles over a sliding time window.
(Summary is not used in vmpooler)
vmpooler_metrics_table is a table of hashes, where the hash key represents the first part of the
metric name, e.g. for the metric 'delete.*' (see above) the key would be 'delete:'. "Sub-metrics",
are supported, again for the 'delete.*' example, this can be subbed into '.failed' and '.success'
The entries within the hash as are follows:
mtype:
Metric type, which is one of the following constants:
M_COUNTER = 1
M_GAUGE = 2
M_SUMMARY = 3
M_HISTOGRAM = 4
torun:
Indicates which process the metric is for - within vmpooler this is either ':api' or ':manager'
(there is a suggestion that we change this to two separate tables).
docstring:
Documentation string for the metric - this is displayed as HELP text by the endpoint.
metric_suffixes:
Array of sub-metrics of the form 'sub-metric: "doc-string for sub-metric"'. This supports
the generation of individual sub-metrics for all elements in the array.
param_labels:
This is an optional array of symbols for the final labels in a metric. It should not be
specified if there are no additional parameters.
If it specified, it can either be a single symbol, or two or more symbols. The treatment
differs if there is only one symbol given as all of the remainder of the metric string
supplied is collected into a label with the symbol name. This allows the handling of
node names (FQDN).
To illustrate:
1. In the 'connect.*' or 'delete.*' example above, it should not be specified.
2. For the 'migrate_from.*' example above, the remainder of the measure is collected
as the 'host_name' label.
3. For the 'vmpooler_user' example above, the first parameter is treated as the pool
name, and the second as the username.
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 |
# File 'lib/vmpooler/metrics/promstats.rb', line 153 def vmpooler_metrics_table { errors: { mtype: M_COUNTER, torun: %i[manager], docstring: 'Count of errors for pool', metric_suffixes: { markedasfailed: 'timeout waiting for instance to initialise', duplicatehostname: 'unable to create instance due to duplicate hostname', staledns: 'unable to create instance due to duplicate DNS record' }, param_labels: %i[template_name] }, user: { mtype: M_COUNTER, torun: %i[api], docstring: 'Number of pool instances and the operation performed by a user', param_labels: %i[user operation poolname] }, usage_litmus: { mtype: M_COUNTER, torun: %i[api], docstring: 'Number of pool instances and the operation performed by Litmus jobs', param_labels: %i[user operation poolname] }, usage_jenkins_instance: { mtype: M_COUNTER, torun: %i[api], docstring: 'Number of pool instances and the operation performed by Jenkins instances', param_labels: %i[jenkins_instance value_stream operation poolname] }, usage_branch_project: { mtype: M_COUNTER, torun: %i[api], docstring: 'Number of pool instances and the operation performed by Jenkins branch/project', param_labels: %i[branch project operation poolname] }, usage_job_component: { mtype: M_COUNTER, torun: %i[api], docstring: 'Number of pool instances and the operation performed by Jenkins job/component', param_labels: %i[job_name component_to_test operation poolname] }, checkout: { mtype: M_COUNTER, torun: %i[api], docstring: 'Pool checkout counts', metric_suffixes: { nonresponsive: 'checkout failed - non responsive machine', empty: 'checkout failed - no machine', success: 'successful checkout', invalid: 'checkout failed - invalid template' }, param_labels: %i[poolname] }, delete: { mtype: M_COUNTER, torun: %i[api], docstring: 'Delete machine', metric_suffixes: { success: 'succeeded', failed: 'failed' }, param_labels: [] }, ondemandrequest_generate: { mtype: M_COUNTER, torun: %i[api], docstring: 'Ondemand request', metric_suffixes: { duplicaterequests: 'failed duplicate request', success: 'succeeded' }, param_labels: [] }, ondemandrequest_fail: { mtype: M_COUNTER, torun: %i[api], docstring: 'Ondemand request failure', metric_suffixes: { toomanyrequests: 'too many requests', invalid: 'invalid poolname' }, param_labels: %i[poolname] }, config: { mtype: M_COUNTER, torun: %i[api], docstring: 'vmpooler pool configuration request', metric_suffixes: { invalid: 'Invalid' }, param_labels: %i[poolname] }, poolreset: { mtype: M_COUNTER, torun: %i[api], docstring: 'Pool reset counter', metric_suffixes: { invalid: 'Invalid Pool' }, param_labels: %i[poolname] }, connect: { mtype: M_COUNTER, torun: %i[manager], docstring: 'vmpooler connect (to vSphere)', metric_suffixes: { open: 'Connect Succeeded', fail: 'Connect Failed' }, param_labels: [] }, migrate_from: { mtype: M_COUNTER, torun: %i[manager], docstring: 'vmpooler machine migrated from', param_labels: %i[host_name] }, migrate_to: { mtype: M_COUNTER, torun: %i[manager], docstring: 'vmpooler machine migrated to', param_labels: %i[host_name] }, http_requests_vm_total: { mtype: M_COUNTER, torun: %i[api], docstring: 'Total number of HTTP request/sub-operations handled by the Rack application under the /vm endpoint', param_labels: %i[method subpath operation] }, ready: { mtype: M_GAUGE, torun: %i[manager], docstring: 'vmpooler number of machines in ready State', param_labels: %i[poolname] }, running: { mtype: M_GAUGE, torun: %i[manager], docstring: 'vmpooler number of machines running', param_labels: %i[poolname] }, connection_available: { mtype: M_GAUGE, torun: %i[manager], docstring: 'vmpooler redis connections available', param_labels: %i[type provider] }, time_to_ready_state: { mtype: M_HISTOGRAM, torun: %i[manager], buckets: POOLER_READY_TIME_BUCKETS, docstring: 'Time taken for machine to read ready state for pool', param_labels: %i[poolname] }, migrate: { mtype: M_HISTOGRAM, torun: %i[manager], buckets: POOLER_CLONE_TIME_BUCKETS, docstring: 'vmpooler time taken to migrate machine for pool', param_labels: %i[poolname] }, clone: { mtype: M_HISTOGRAM, torun: %i[manager], buckets: POOLER_CLONE_TIME_BUCKETS, docstring: 'vmpooler time taken to clone machine', param_labels: %i[poolname] }, destroy: { mtype: M_HISTOGRAM, torun: %i[manager], buckets: POOLER_CLONE_TIME_BUCKETS, docstring: 'vmpooler time taken to destroy machine', param_labels: %i[poolname] }, connection_waited: { mtype: M_HISTOGRAM, torun: %i[manager], buckets: REDIS_CONNECT_BUCKETS, docstring: 'vmpooler redis connection wait time', param_labels: %i[type provider] } } end |