Module: StringMagic::Core::Analysis

Included in:
String, StringMagic
Defined in:
lib/string_magic/core/analysis.rb

Instance Method Summary collapse

Instance Method Details

#extract_datesObject



44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/string_magic/core/analysis.rb', line 44

def extract_dates
  return [] if empty?

  patterns = [
    %r{\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b},   # 01/31/2025  or 31-01-25
    %r{\b\d{4}[-/]\d{1,2}[-/]\d{1,2}\b},     # 2025-01-31
    /\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2},?\s+\d{4}\b/i,
    /\b\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{4}\b/i
  ]

  patterns.flat_map { |re| scan(re) }.uniq
end

#extract_emailsObject



23
24
25
26
# File 'lib/string_magic/core/analysis.rb', line 23

def extract_emails
  return [] if empty?
  scan(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/).uniq
end

#extract_entitiesObject


Entity extraction




10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/string_magic/core/analysis.rb', line 10

def extract_entities
  return default_entities_hash if empty?

  {
    emails:        extract_emails,
    urls:          extract_urls,
    phone_numbers: extract_phones,
    dates:         extract_dates,
    hashtags:      extract_hashtags,
    mentions:      extract_mentions
  }
end

#extract_hashtagsObject



57
58
59
60
# File 'lib/string_magic/core/analysis.rb', line 57

def extract_hashtags
  return [] if empty?
  scan(/#(\w+)/).flatten.uniq
end

#extract_mentionsObject



62
63
64
65
# File 'lib/string_magic/core/analysis.rb', line 62

def extract_mentions
  return [] if empty?
  scan(/(?:^|\s)@([A-Za-z0-9_]+)/).flatten.uniq
end

#extract_phonesObject



38
39
40
41
42
# File 'lib/string_magic/core/analysis.rb', line 38

def extract_phones
  return [] if empty?
  phone_re = /(?:\+\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}(?!\d)/
  scan(phone_re).uniq
end

#extract_urlsObject



28
29
30
31
32
33
34
35
36
# File 'lib/string_magic/core/analysis.rb', line 28

def extract_urls
  return [] if empty?
  
  # initial capture
  urls = scan(%r{https?://[^\s<>"']+|www\.[^\s<>"']+})
  
  # strip trailing punctuation like . , ; : ! ? )
  urls.map { |u| u.gsub(/[\.,;:!?)+]+\z/, '') }.uniq
end

#readability_scoreObject


Text statistics




71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/string_magic/core/analysis.rb', line 71

def readability_score
  return 0.0 if empty?

  sentences = split(/[.!?]+/).map(&:strip).reject(&:empty?)
  return 0.0 if sentences.empty?

  words = scan(/\b[\p{L}\p{N}'-]+\b/)
  return 0.0 if words.empty?

  syllables = words.sum { |w| calculate_syllables(w) }
  return 0.0 if syllables.zero?

  score = 0.39 * (words.size.to_f / sentences.size) +
          11.8 * (syllables.to_f / words.size) - 15.59

  score.round(1).clamp(0, Float::INFINITY)
end

#sentiment_indicatorsObject



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/string_magic/core/analysis.rb', line 94

def sentiment_indicators
  return { positive: 0, negative: 0, neutral: 1 } if empty?

  positive_words = %w[good great excellent amazing wonderful fantastic happy joy love like best awesome]
  negative_words = %w[bad terrible awful horrible sad hate dislike worst annoying frustrating]

  words = downcase.scan(/\b[\p{L}\p{N}'-]+\b/)
  pos   = words.count { |w| positive_words.include?(w) }
  neg   = words.count { |w| negative_words.include?(w) }
  total = pos + neg

  if total.zero?
    { positive: 0, negative: 0, neutral: 1 }
  else
    { positive: (pos.to_f / total).round(2),
      negative: (neg.to_f / total).round(2),
      neutral: 0 }
  end
end

#word_frequencyObject



89
90
91
92
# File 'lib/string_magic/core/analysis.rb', line 89

def word_frequency
  return {} if empty?
  downcase.scan(/\b[\p{L}\p{N}'-]+\b/).tally
end