Module: Gitlab::Database::Median

Included in:
CycleAnalytics::BaseQuery
Defined in:
lib/gitlab/database/median.rb

Constant Summary collapse

NotSupportedError =
Class.new(StandardError)

Instance Method Summary collapse

Instance Method Details

#extract_median(results) ⇒ Object


17
18
19
20
21
22
23
# File 'lib/gitlab/database/median.rb', line 17

def extract_median(results)
  result = results.compact.first

  result = result.first.presence

  result['median']&.to_f if result
end

#extract_medians(results) ⇒ Object


25
26
27
28
29
30
31
# File 'lib/gitlab/database/median.rb', line 25

def extract_medians(results)
  median_values = results.compact.first.values

  median_values.each_with_object({}) do |(id, median), hash|
    hash[id.to_i] = median&.to_f
  end
end

#median_datetime(arel_table, query_so_far, column_sym) ⇒ Object


9
10
11
# File 'lib/gitlab/database/median.rb', line 9

def median_datetime(arel_table, query_so_far, column_sym)
  extract_median(execute_queries(arel_table, query_so_far, column_sym)).presence
end

#median_datetimes(arel_table, query_so_far, column_sym, partition_column) ⇒ Object


13
14
15
# File 'lib/gitlab/database/median.rb', line 13

def median_datetimes(arel_table, query_so_far, column_sym, partition_column)
  extract_medians(execute_queries(arel_table, query_so_far, column_sym, partition_column)).presence
end

#pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column = nil) ⇒ Object


33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/gitlab/database/median.rb', line 33

def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column = nil)
  # Create a CTE with the column we're operating on, row number (after sorting by the column
  # we're operating on), and count of the table we're operating on (duplicated across) all rows
  # of the CTE. For example, if we're looking to find the median of the `projects.star_count`
  # column, the CTE might look like this:
  #
  #  star_count | row_id | ct
  # ------------+--------+----
  #           5 |      1 |  3
  #           9 |      2 |  3
  #          15 |      3 |  3
  #
  #  If a partition column is used we will do the same operation but for separate partitions,
  #  when that happens the CTE might look like this:
  #
  #  project_id | star_count | row_id | ct
  # ------------+------------+--------+----
  #           1 |          5 |     1 |  2
  #           1 |          9 |     2 |  2
  #           2 |         10 |     1 |  3
  #           2 |         15 |     2 |  3
  #           2 |         20 |     3 |  3
  cte_table = Arel::Table.new("ordered_records")

  cte = Arel::Nodes::As.new(
    cte_table,
    arel_table.project(*rank_rows(arel_table, column_sym, partition_column)).
      # Disallow negative values
      where(arel_table[column_sym].gteq(zero_interval)))

  # From the CTE, select either the middle row or the middle two rows (this is accomplished
  # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
  # selected rows, and this is the median value.
  result =
    cte_table
      .project(*median_projections(cte_table, column_sym, partition_column))
      .where(
        Arel::Nodes::Between.new(
          cte_table[:row_id],
          Arel::Nodes::And.new(
            [(cte_table[:ct] / Arel.sql('2.0')),
             (cte_table[:ct] / Arel.sql('2.0') + 1)]
          )
        )
      )
      .with(query_so_far, cte)

  result.group(cte_table[partition_column]).order(cte_table[partition_column]) if partition_column

  result.to_sql
end