Class: RubyMaat::Dataset
- Inherits:
-
Object
- Object
- RubyMaat::Dataset
- Defined in:
- lib/ruby_maat/dataset.rb
Overview
Wrapper around Rover DataFrame to provide domain-specific operations This replaces Incanter datasets from the Clojure version
Class Method Summary collapse
Instance Method Summary collapse
-
#authors ⇒ Object
Get all authors.
-
#coupling_pairs ⇒ Object
Get coupling pairs (combinations of entities that changed together).
- #empty? ⇒ Boolean
-
#entities ⇒ Object
Get all entities (files).
-
#filter_date_range(start_date, end_date) ⇒ Object
Filter by date range.
-
#filter_min_revisions(min_revs) ⇒ Object
Filter by minimum revisions.
-
#group_by_author_sum_churn ⇒ Object
Group by author and sum churn metrics.
-
#group_by_entity_count_authors ⇒ Object
Group by entity and count distinct authors.
-
#group_by_entity_count_revisions ⇒ Object
Group by entity and count revisions.
-
#group_by_entity_sum_churn ⇒ Object
Group by entity and sum churn metrics.
-
#initialize(change_records = []) ⇒ Dataset
constructor
A new instance of Dataset.
-
#latest_date_by_entity ⇒ Object
Get latest date for each entity (for age analysis).
-
#revision_count(entity) ⇒ Object
Get revision count for an entity.
-
#shared_revisions_count(entity1, entity2) ⇒ Object
Count shared revisions between entity pairs.
- #size ⇒ Object
- #to_df ⇒ Object
-
#unique_dates ⇒ Object
Get unique dates.
Constructor Details
#initialize(change_records = []) ⇒ Dataset
Returns a new instance of Dataset.
9 10 11 |
# File 'lib/ruby_maat/dataset.rb', line 9 def initialize(change_records = []) @data = build_dataframe(change_records) end |
Class Method Details
.from_changes(change_records) ⇒ Object
13 14 15 |
# File 'lib/ruby_maat/dataset.rb', line 13 def self.from_changes(change_records) new(change_records) end |
Instance Method Details
#authors ⇒ Object
Get all authors
49 50 51 52 53 |
# File 'lib/ruby_maat/dataset.rb', line 49 def return [] if @data.empty? @data[:author].uniq end |
#coupling_pairs ⇒ Object
Get coupling pairs (combinations of entities that changed together)
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/ruby_maat/dataset.rb', line 92 def coupling_pairs # Group by revision to find entities that changed together revision_entities = {} @data.to_a.each do |row| revision = row["revision"] entity = row["entity"] revision_entities[revision] ||= [] revision_entities[revision] << entity unless revision_entities[revision].include?(entity) end pairs = [] revision_entities.each_value do |entities| entities.combination(2) do |entity1, entity2| pairs << [entity1, entity2] end end pairs end |
#empty? ⇒ Boolean
173 174 175 |
# File 'lib/ruby_maat/dataset.rb', line 173 def empty? @data.empty? end |
#entities ⇒ Object
Get all entities (files)
42 43 44 45 46 |
# File 'lib/ruby_maat/dataset.rb', line 42 def entities return [] if @data.empty? @data[:entity].uniq end |
#filter_date_range(start_date, end_date) ⇒ Object
Filter by date range
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# File 'lib/ruby_maat/dataset.rb', line 145 def filter_date_range(start_date, end_date) filtered_records = [] @data.each_row do |row| next unless row[:date].between?(start_date, end_date) filtered_records << ChangeRecord.new( entity: row[:entity], author: row[:author], date: row[:date], revision: row[:revision], message: row[:message], loc_added: row[:loc_added], loc_deleted: row[:loc_deleted] ) end Dataset.from_changes(filtered_records) end |
#filter_min_revisions(min_revs) ⇒ Object
Filter by minimum revisions
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/ruby_maat/dataset.rb', line 56 def filter_min_revisions(min_revs) # Group by entity and count revisions entity_revision_counts = {} @data.to_a.each do |row| entity = row["entity"] revision = row["revision"] entity_revision_counts[entity] ||= Set.new entity_revision_counts[entity] << revision end # Find entities with enough revisions entities_to_keep = entity_revision_counts.select { |_, revisions| revisions.size >= min_revs }.keys # Filter data to only include those entities filtered_records = [] @data.to_a.each do |row| filtered_records << row if entities_to_keep.include?(row["entity"]) end # Build new dataset from filtered records change_records = filtered_records.map do |record| ChangeRecord.new( entity: record["entity"], author: record["author"], date: record["date"], revision: record["revision"], message: record["message"], loc_added: record["loc_added"], loc_deleted: record["loc_deleted"] ) end Dataset.from_changes(change_records) end |
#group_by_author_sum_churn ⇒ Object
Group by author and sum churn metrics
32 33 34 |
# File 'lib/ruby_maat/dataset.rb', line 32 def @data.group(:author).sum(%i[loc_added loc_deleted]) end |
#group_by_entity_count_authors ⇒ Object
Group by entity and count distinct authors
22 23 24 |
# File 'lib/ruby_maat/dataset.rb', line 22 def @data.group(:entity).count(:author, name: "n_authors") end |
#group_by_entity_count_revisions ⇒ Object
Group by entity and count revisions
27 28 29 |
# File 'lib/ruby_maat/dataset.rb', line 27 def group_by_entity_count_revisions @data.group(:entity).count(:revision, name: "n_revs") end |
#group_by_entity_sum_churn ⇒ Object
Group by entity and sum churn metrics
37 38 39 |
# File 'lib/ruby_maat/dataset.rb', line 37 def group_by_entity_sum_churn @data.group(:entity).sum(%i[loc_added loc_deleted]) end |
#latest_date_by_entity ⇒ Object
Get latest date for each entity (for age analysis)
165 166 167 |
# File 'lib/ruby_maat/dataset.rb', line 165 def latest_date_by_entity @data.group(:entity).max(:date) end |
#revision_count(entity) ⇒ Object
Get revision count for an entity
131 132 133 134 135 136 137 |
# File 'lib/ruby_maat/dataset.rb', line 131 def revision_count(entity) revisions = Set.new @data.to_a.each do |row| revisions << row["revision"] if row["entity"] == entity end revisions.size end |
#shared_revisions_count(entity1, entity2) ⇒ Object
Count shared revisions between entity pairs
115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/ruby_maat/dataset.rb', line 115 def shared_revisions_count(entity1, entity2) entity1_revs = Set.new entity2_revs = Set.new @data.to_a.each do |row| if row["entity"] == entity1 entity1_revs << row["revision"] elsif row["entity"] == entity2 entity2_revs << row["revision"] end end (entity1_revs & entity2_revs).size end |
#size ⇒ Object
169 170 171 |
# File 'lib/ruby_maat/dataset.rb', line 169 def size @data.count end |
#to_df ⇒ Object
17 18 19 |
# File 'lib/ruby_maat/dataset.rb', line 17 def to_df @data end |
#unique_dates ⇒ Object
Get unique dates
140 141 142 |
# File 'lib/ruby_maat/dataset.rb', line 140 def unique_dates @data[:date].uniq.sort end |