Class: DataQualityReport

Inherits:
ChartBase show all
Defined in:
lib/jirametrics/data_quality_report.rb

Defined Under Namespace

Classes: Entry

Instance Attribute Summary collapse

Attributes inherited from ChartBase

#aggregated_project, #all_boards, #atlassian_document_format, #canvas_height, #canvas_width, #data_quality, #date_range, #file_system, #holiday_dates, #issues, #settings, #time_range, #timezone_offset

Instance Method Summary collapse

Methods inherited from ChartBase

#aggregated_project?, #canvas, #canvas_responsive?, #chart_format, #collapsible_issues_panel, #color_block, #color_for, #completed_issues_in_range, #current_board, #daily_chart_dataset, #describe_non_working_days, #description_text, #format_integer, #format_status, #header_text, #holidays, #html_directory, #icon_span, #label_days, #link_to_issue, #next_id, #random_color, #render, #render_top_text, #status_category_color, #working_days_annotation, #wrap_and_render

Constructor Details

#initialize(discarded_changes_data) ⇒ DataQualityReport

Returns a new instance of DataQualityReport.



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/jirametrics/data_quality_report.rb', line 22

def initialize discarded_changes_data
  super()

  @discarded_changes_data = discarded_changes_data

  header_text 'Data Quality Report'
  description_text "    <p>\n      We have a tendency to assume that anything we see in a chart is 100% accurate, although that's\n      not always true. To understand the accuracy of the chart, we have to understand how accurate the\n      initial data was and also how much of the original data set was used in the chart. This section\n      will hopefully give you enough information to make that decision.\n    </p>\n  HTML\nend\n"

Instance Attribute Details

#board_idObject

Returns the value of attribute board_id.



5
6
7
# File 'lib/jirametrics/data_quality_report.rb', line 5

def board_id
  @board_id
end

#discarded_changes_dataObject (readonly)

Both for testing purposes only



4
5
6
# File 'lib/jirametrics/data_quality_report.rb', line 4

def discarded_changes_data
  @discarded_changes_data
end

#entriesObject (readonly)

Both for testing purposes only



4
5
6
# File 'lib/jirametrics/data_quality_report.rb', line 4

def entries
  @entries
end

Instance Method Details

#entries_with_problemsObject



117
118
119
# File 'lib/jirametrics/data_quality_report.rb', line 117

def entries_with_problems
  @entries.reject { |entry| entry.problems.empty? }
end

#initialize_entriesObject



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/jirametrics/data_quality_report.rb', line 121

def initialize_entries
  @entries = @issues.filter_map do |issue|
    started, stopped = issue.board.cycletime.started_stopped_times(issue)
    next if stopped && stopped < time_range.begin
    next if started && started > time_range.end

    Entry.new started: started, stopped: stopped, issue: issue
  end

  @entries.sort! do |a, b|
    a.issue.key =~ /.+-(\d+)$/
    a_id = $1.to_i

    b.issue.key =~ /.+-(\d+)$/
    b_id = $1.to_i

    a_id <=> b_id
  end
end

#label_issues(number) ⇒ Object



322
323
324
325
326
# File 'lib/jirametrics/data_quality_report.rb', line 322

def label_issues number
  return '1 item' if number == 1

  "#{number} items"
end

#problems_for(key) ⇒ Object



83
84
85
86
87
88
89
90
91
# File 'lib/jirametrics/data_quality_report.rb', line 83

def problems_for key
  result = []
  @entries.each do |entry|
    entry.problems.each do |problem_key, detail|
      result << [entry.issue, detail, key] if problem_key == key
    end
  end
  result
end

#render_backwards_through_status_categories(problems) ⇒ Object



394
395
396
397
398
399
400
# File 'lib/jirametrics/data_quality_report.rb', line 394

def render_backwards_through_status_categories problems
  "    \#{label_issues problems.size} moved backwards across the board, <b>crossing status categories</b>.\n    This will almost certainly have impacted timings as the end times are often taken at status category\n    boundaries. You should assume that any timing measurements for this item are wrong.\n  HTML\nend\n"

#render_backwords_through_statuses(problems) ⇒ Object



402
403
404
405
406
407
408
# File 'lib/jirametrics/data_quality_report.rb', line 402

def render_backwords_through_statuses problems
  "    \#{label_issues problems.size} moved backwards across the board. Depending where we have set the\n    start and end points, this may give us incorrect timing data. Note that these items did not cross\n    a status category and may not have affected metrics.\n  HTML\nend\n"

#render_completed_but_not_started(problems) ⇒ Object



371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
# File 'lib/jirametrics/data_quality_report.rb', line 371

def render_completed_but_not_started problems
  percentage_work_included = ((issues.size - problems.size).to_f / issues.size * 100).to_i
  html = "    \#{label_issues problems.size} were discarded from all charts using cycletime (scatterplot, histogram, etc)\n    as we couldn't determine when they started.\n  HTML\n  if percentage_work_included < 85\n    html << <<-HTML\n      Consider whether looking at only \#{percentage_work_included}% of the total data points is enough\n      to come to any reasonable conclusions. See <a href=\"https://unconsciousagile.com/2024/11/19/survivor-bias.html\">\n      Survivor Bias</a>.\n    HTML\n  end\n  html\nend\n"

#render_created_in_wrong_status(problems) ⇒ Object



419
420
421
422
423
424
425
# File 'lib/jirametrics/data_quality_report.rb', line 419

def render_created_in_wrong_status problems
  "    \#{label_issues problems.size} were created in a status that is not considered to be some varient\n    of To Do. Most likely this means that the issue was created from one of the columns on the board,\n    rather than in the backlog. Why Jira allows this is still a mystery.\n  HTML\nend\n"

#render_discarded_changes(problems) ⇒ Object



363
364
365
366
367
368
369
# File 'lib/jirametrics/data_quality_report.rb', line 363

def render_discarded_changes problems
  "    \#{label_issues problems.size} have had information discarded. This configuration is set\n    to \"reset the clock\" if an item is moved back to the backlog after it's been started. This hides important\n    information and makes the data less accurate. <b>Moving items back to the backlog is strongly discouraged.</b>\n  HTML\nend\n"

#render_incomplete_subtasks_when_issue_done(problems) ⇒ Object



443
444
445
446
447
# File 'lib/jirametrics/data_quality_report.rb', line 443

def render_incomplete_subtasks_when_issue_done problems
  "    \#{label_issues problems.size} issues were marked as done while subtasks were still not done.\n  HTML\nend\n"

#render_issue_not_started_but_subtasks_have(problems) ⇒ Object



435
436
437
438
439
440
441
# File 'lib/jirametrics/data_quality_report.rb', line 435

def render_issue_not_started_but_subtasks_have problems
  "    \#{label_issues problems.size} still showing 'not started' while sub-tasks underneath them have\n    started. This is almost always a mistake; if we're working on subtasks, the top level item should\n    also have started.\n  HTML\nend\n"

#render_issue_on_multiple_boards(problems) ⇒ Object



449
450
451
452
453
454
# File 'lib/jirametrics/data_quality_report.rb', line 449

def render_issue_on_multiple_boards problems
  "    For \#{label_issues problems.size}, we have an issue that shows up on more than one board. This\n    could result in more data points showing up on a chart then there really should be.\n  HTML\nend\n"

#render_items_blocked_on_closed_tickets(problems) ⇒ Object



456
457
458
459
460
461
# File 'lib/jirametrics/data_quality_report.rb', line 456

def render_items_blocked_on_closed_tickets problems
  "    For \#{label_issues problems.size}, the issue is identified as being blocked by another issue. Yet,\n    that other issue is already completed so, by definition, it can't still be blocking.\n  HTML\nend\n"

#render_problem_type(problem_key) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
# File 'lib/jirametrics/data_quality_report.rb', line 93

def render_problem_type problem_key
  problems = problems_for problem_key
  return '' if problems.empty?

  "    <li>\n      \#{__send__ :\"render_\#{problem_key}\", problems}\n      \#{collapsible_issues_panel problems}\n    </li>\n  HTML\nend\n"

#render_status_changes_after_done(problems) ⇒ Object



387
388
389
390
391
392
# File 'lib/jirametrics/data_quality_report.rb', line 387

def render_status_changes_after_done problems
  "    \#{label_issues problems.size} had a status change after being identified as done. We should question\n    whether they were really done at that point or if we stopped the clock too early.\n  HTML\nend\n"

#render_status_not_on_board(problems) ⇒ Object



410
411
412
413
414
415
416
417
# File 'lib/jirametrics/data_quality_report.rb', line 410

def render_status_not_on_board problems
  "    \#{label_issues problems.size} were not visible on the board for some period of time. This may impact\n    timings as the work was likely to have been forgotten if it wasn't visible. What does \"not visible\"\n    mean in this context? The issue was in a status that is not mapped to any visible column on the board.\n    Look in \"unmapped statuses\" on your board.\n  HTML\nend\n"

#render_stopped_before_started(problems) ⇒ Object



427
428
429
430
431
432
433
# File 'lib/jirametrics/data_quality_report.rb', line 427

def render_stopped_before_started problems
  "    \#{label_issues problems.size} were stopped before they were started and this will play havoc with\n    any cycletime or WIP calculations. The most common case for this is when an item gets closed and\n    then moved back into an in-progress status.\n  HTML\nend\n"

#runObject



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/jirametrics/data_quality_report.rb', line 38

def run
  initialize_entries

  @entries.each do |entry|
    board = entry.issue.board
    backlog_statuses = board.backlog_statuses

    scan_for_completed_issues_without_a_start_time entry: entry
    scan_for_status_change_after_done entry: entry
    scan_for_backwards_movement entry: entry, backlog_statuses: backlog_statuses
    scan_for_issues_not_created_in_a_backlog_status entry: entry, backlog_statuses: backlog_statuses
    scan_for_stopped_before_started entry: entry
    scan_for_issues_not_started_with_subtasks_that_have entry: entry
    scan_for_incomplete_subtasks_when_issue_done entry: entry
    scan_for_discarded_data entry: entry
    scan_for_items_blocked_on_closed_tickets entry: entry
  end

  scan_for_issues_on_multiple_boards entries: @entries

  entries_with_problems = entries_with_problems()
  return '' if entries_with_problems.empty?

  caller_binding = binding
  result = +''
  result << render_top_text(caller_binding)

  result << '<ul class="quality_report">'
  result << render_problem_type(:discarded_changes)
  result << render_problem_type(:completed_but_not_started)
  result << render_problem_type(:status_changes_after_done)
  result << render_problem_type(:backwards_through_status_categories)
  result << render_problem_type(:backwords_through_statuses)
  result << render_problem_type(:status_not_on_board)
  result << render_problem_type(:created_in_wrong_status)
  result << render_problem_type(:stopped_before_started)
  result << render_problem_type(:issue_not_started_but_subtasks_have)
  result << render_problem_type(:incomplete_subtasks_when_issue_done)
  result << render_problem_type(:issue_on_multiple_boards)
  result << render_problem_type(:items_blocked_on_closed_tickets)
  result << '</ul>'

  result
end

#scan_for_backwards_movement(entry:, backlog_statuses:) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/jirametrics/data_quality_report.rb', line 175

def scan_for_backwards_movement entry:, backlog_statuses:
  issue = entry.issue

  # Moving backwards through statuses is bad. Moving backwards through status categories is almost always worse.
  last_index = -1
  issue.changes.each do |change|
    next unless change.status?

    board = entry.issue.board
    index = entry.issue.board.visible_columns.find_index { |column| column.status_ids.include? change.value_id }
    if index.nil?
      # If it's a backlog status then ignore it. Not supposed to be visible.
      next if entry.issue.board.backlog_statuses.include?(board.possible_statuses.find_by_id(change.value_id))

      detail = "Status #{format_status change, board: board} is not on the board"
      if issue.board.possible_statuses.find_by_id(change.value_id).nil?
        detail = "Status #{format_status change, board: board} cannot be found at all. Was it deleted?"
      end

      # If it's been moved back to backlog then it's on a different report. Ignore it here.
      detail = nil if backlog_statuses.any? { |s| s.name == change.value }

      entry.report(problem_key: :status_not_on_board, detail: detail) unless detail.nil?
    elsif change.old_value.nil?
      # Do nothing
    elsif index < last_index
      new_category = board.possible_statuses.find_by_id(change.value_id).category.name
      old_category = board.possible_statuses.find_by_id(change.old_value_id).category.name

      if new_category == old_category
        entry.report(
          problem_key: :backwords_through_statuses,
          detail: "Moved from #{format_status change, use_old_status: true, board: board}" \
            " to #{format_status change, board: board}" \
            " on #{change.time.to_date}"
        )
      else
        entry.report(
          problem_key: :backwards_through_status_categories,
          detail: "Moved from #{format_status change, use_old_status: true, board: board}" \
            " to #{format_status change, board: board}" \
            " on #{change.time.to_date}," \
            " crossing from category #{format_status change, use_old_status: true, board: board, is_category: true}" \
            " to #{format_status change, board: board, is_category: true}."
        )
      end
    end
    last_index = index || -1
  end
end

#scan_for_completed_issues_without_a_start_time(entry:) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/jirametrics/data_quality_report.rb', line 141

def scan_for_completed_issues_without_a_start_time entry:
  return unless entry.stopped && entry.started.nil?

  status_names = entry.issue.status_changes.filter_map do |change|
    format_status change, board: entry.issue.board
  end

  entry.report(
    problem_key: :completed_but_not_started,
    detail: "Status changes: #{status_names.join ' → '}"
  )
end

#scan_for_discarded_data(entry:) ⇒ Object



328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/jirametrics/data_quality_report.rb', line 328

def scan_for_discarded_data entry:
  hash = @discarded_changes_data&.find { |a| a[:issue] == entry.issue }
  return if hash.nil?

  old_start_time = hash[:original_start_time]
  cutoff_time = hash[:cutoff_time]

  old_start_date = old_start_time.to_date
  cutoff_date = cutoff_time.to_date

  days_ignored = (cutoff_date - old_start_date).to_i + 1
  message = "Started: #{old_start_date}, Discarded: #{cutoff_date}, Ignored: #{label_days days_ignored}"

  # If days_ignored is zero then we don't really care as it won't affect any of the calculations.
  return if days_ignored == 1

  entry.report(
    problem_key: :discarded_changes,
    detail: message
  )
end

#scan_for_incomplete_subtasks_when_issue_done(entry:) ⇒ Object



299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/jirametrics/data_quality_report.rb', line 299

def scan_for_incomplete_subtasks_when_issue_done entry:
  return unless entry.stopped

  subtask_labels = entry.issue.subtasks.filter_map do |subtask|
    subtask_started, subtask_stopped = subtask.board.cycletime.started_stopped_times(subtask)

    if !subtask_started && !subtask_stopped
      "#{subtask_label subtask} (Not even started)"
    elsif !subtask_stopped
      "#{subtask_label subtask} (Still not done)"
    elsif subtask_stopped > entry.stopped
      "#{subtask_label subtask} (Closed #{time_as_english entry.stopped, subtask_stopped} later)"
    end
  end

  return if subtask_labels.empty?

  entry.report(
    problem_key: :incomplete_subtasks_when_issue_done,
    detail: subtask_labels.join('<br />')
  )
end

#scan_for_issues_not_created_in_a_backlog_status(entry:, backlog_statuses:) ⇒ Object



226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/jirametrics/data_quality_report.rb', line 226

def scan_for_issues_not_created_in_a_backlog_status entry:, backlog_statuses:
  creation_change = entry.issue.changes.find { |issue| issue.status? }

  return if backlog_statuses.any? { |status| status.id == creation_change.value_id }

  status_string = backlog_statuses.collect { |s| format_status s, board: entry.issue.board }.join(', ')
  entry.report(
    problem_key: :created_in_wrong_status,
    detail: "Created in #{format_status creation_change, board: entry.issue.board}, " \
      "which is not one of the backlog statuses for this board: #{status_string}"
  )
end

#scan_for_issues_not_started_with_subtasks_that_have(entry:) ⇒ Object



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# File 'lib/jirametrics/data_quality_report.rb', line 248

def scan_for_issues_not_started_with_subtasks_that_have entry:
  return if entry.started

  started_subtasks = []
  entry.issue.subtasks.each do |subtask|
    started_subtasks << subtask if subtask.board.cycletime.started_stopped_times(subtask).first
  end

  return if started_subtasks.empty?

  subtask_labels = started_subtasks.collect do |subtask|
    subtask_label(subtask)
  end
  entry.report(
    problem_key: :issue_not_started_but_subtasks_have,
    detail: subtask_labels.join('<br />')
  )
end

#scan_for_issues_on_multiple_boards(entries:) ⇒ Object



350
351
352
353
354
355
356
357
358
359
360
361
# File 'lib/jirametrics/data_quality_report.rb', line 350

def scan_for_issues_on_multiple_boards entries:
  grouped_entries = entries.group_by { |entry| entry.issue.key }
  grouped_entries.each_value do |entry_list|
    next if entry_list.size == 1

    board_names = entry_list.collect { |entry| entry.issue.board.name.inspect }
    entry_list.first.report(
      problem_key: :issue_on_multiple_boards,
      detail: "Found on boards: #{board_names.sort.join(', ')}"
    )
  end
end

#scan_for_items_blocked_on_closed_tickets(entry:) ⇒ Object



267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/jirametrics/data_quality_report.rb', line 267

def scan_for_items_blocked_on_closed_tickets entry:
  entry.issue.issue_links.each do |link|
    this_active = !entry.stopped
    other_active = !link.other_issue.board.cycletime.started_stopped_times(link.other_issue).last
    next unless this_active && !other_active

    entry.report(
      problem_key: :items_blocked_on_closed_tickets,
      detail: "#{entry.issue.key} thinks it's blocked by #{link.other_issue.key}, " \
        "except #{link.other_issue.key} is closed."
    )
  end
end

#scan_for_status_change_after_done(entry:) ⇒ Object



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/jirametrics/data_quality_report.rb', line 154

def scan_for_status_change_after_done entry:
  return unless entry.stopped

  changes_after_done = entry.issue.changes.select do |change|
    change.status? && change.time >= entry.stopped
  end
  done_status = changes_after_done.shift

  return if changes_after_done.empty?

  board = entry.issue.board
  problem = "Completed on #{entry.stopped.to_date} with status #{format_status done_status, board: board}."
  changes_after_done.each do |change|
    problem << " Changed to #{format_status change, board: board} on #{change.time.to_date}."
  end
  entry.report(
    problem_key: :status_changes_after_done,
    detail: problem
  )
end

#scan_for_stopped_before_started(entry:) ⇒ Object



239
240
241
242
243
244
245
246
# File 'lib/jirametrics/data_quality_report.rb', line 239

def scan_for_stopped_before_started entry:
  return unless entry.stopped && entry.started && entry.stopped < entry.started

  entry.report(
    problem_key: :stopped_before_started,
    detail: "The stopped time '#{entry.stopped}' is before the started time '#{entry.started}'"
  )
end

#subtask_label(subtask) ⇒ Object



281
282
283
# File 'lib/jirametrics/data_quality_report.rb', line 281

def subtask_label subtask
  "<img src='#{subtask.type_icon_url}' /> #{link_to_issue(subtask)} #{subtask.summary[..50].inspect}"
end

#testable_entriesObject

Return a format that’s easier to assert against



106
107
108
109
110
111
112
113
114
115
# File 'lib/jirametrics/data_quality_report.rb', line 106

def testable_entries
  formatter = ->(time) { time&.strftime('%Y-%m-%d %H:%M:%S %z') || '' }
  @entries.collect do |entry|
    [
      formatter.call(entry.started),
      formatter.call(entry.stopped),
      entry.issue
    ]
  end
end

#time_as_english(from_time, to_time) ⇒ Object



285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/jirametrics/data_quality_report.rb', line 285

def time_as_english(from_time, to_time)
  delta = (to_time - from_time).to_i
  return "#{delta} seconds" if delta < 60

  delta /= 60
  return "#{delta} minutes" if delta < 60

  delta /= 60
  return "#{delta} hours" if delta < 24

  delta /= 24
  "#{delta} days"
end