Class: RubyMaat::Analysis::Authors

Inherits:
BaseAnalysis show all
Defined in:
lib/ruby_maat/analysis/authors.rb

Overview

Authors analysis - counts distinct authors per entity Research shows that the number of authors of a module is related to quality problems

Instance Method Summary collapse

Instance Method Details

#analyze(dataset, options = {}) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/ruby_maat/analysis/authors.rb', line 8

def analyze(dataset, options = {})
  min_revs = options[:min_revs] || 1

  # Group by entity and count distinct authors and revisions manually
  entity_stats = {}

  dataset.to_df.to_a.each do |row|
    entity = row["entity"]
    author = row["author"]
    revision = row["revision"]

    entity_stats[entity] ||= {authors: Set.new, revisions: Set.new}
    entity_stats[entity][:authors] << author
    entity_stats[entity][:revisions] << revision
  end

  # Build results and apply minimum revisions filter
  results = []
  entity_stats.each do |entity, stats|
    n_revs = stats[:revisions].size
    next if n_revs < min_revs

    results << {
      entity: entity,
      "n-authors": stats[:authors].size,
      "n-revs": n_revs
    }
  end

  # Sort by number of authors (descending), then by revisions (descending)
  results.sort! do |a, b|
    comparison = b[:"n-authors"] <=> a[:"n-authors"]
    comparison.zero? ? b[:"n-revs"] <=> a[:"n-revs"] : comparison
  end

  to_csv_data(results, [:entity, :"n-authors", :"n-revs"])
end