Class: RubyMaat::Analysis::CommitMessages

Inherits:
BaseAnalysis show all
Defined in:
lib/ruby_maat/analysis/commit_messages.rb

Overview

Commit messages analysis - word frequency analysis of commit messages

Instance Method Summary collapse

Instance Method Details

#analyze(dataset, options = {}) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/ruby_maat/analysis/commit_messages.rb', line 7

def analyze(dataset, options = {})
  expression = options[:expression_to_match]

  # Extract commit messages
  messages = dataset.to_df[:message].compact

  # Filter by regex if provided
  if expression
    regex = Regexp.new(expression, Regexp::IGNORECASE)
    messages = messages.grep(regex)
  end

  # Tokenize and count words
  word_frequencies = Hash.new(0)

  messages.each do |message|
    # Simple tokenization: split on whitespace and punctuation, convert to lowercase
    words = message.downcase.split(/[^a-zA-Z0-9]+/).reject(&:empty?)

    # Filter out common stop words and very short words
    words = words.reject { |word| word.length < 3 || stop_words.include?(word) }

    words.each { |word| word_frequencies[word] += 1 }
  end

  # Convert to results format
  results = word_frequencies.map do |word, frequency|
    {
      word: word,
      frequency: frequency
    }
  end

  # Sort by frequency descending
  results.sort_by! { |r| -r[:frequency] }

  to_csv_data(results, i[word frequency])
end