Module: StringMagic::Core::Analysis
- Included in:
- String, StringMagic
- Defined in:
- lib/string_magic/core/analysis.rb
Instance Method Summary collapse
- #extract_dates ⇒ Object
- #extract_emails ⇒ Object
-
#extract_entities ⇒ Object
—————————————————————— Entity extraction ——————————————————————.
- #extract_hashtags ⇒ Object
- #extract_mentions ⇒ Object
- #extract_phones ⇒ Object
- #extract_urls ⇒ Object
-
#readability_score ⇒ Object
—————————————————————— Text statistics ——————————————————————.
- #sentiment_indicators ⇒ Object
- #word_frequency ⇒ Object
Instance Method Details
#extract_dates ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/string_magic/core/analysis.rb', line 44 def extract_dates return [] if empty? patterns = [ %r{\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b}, # 01/31/2025 or 31-01-25 %r{\b\d{4}[-/]\d{1,2}[-/]\d{1,2}\b}, # 2025-01-31 /\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2},?\s+\d{4}\b/i, /\b\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{4}\b/i ] patterns.flat_map { |re| scan(re) }.uniq end |
#extract_emails ⇒ Object
23 24 25 26 |
# File 'lib/string_magic/core/analysis.rb', line 23 def extract_emails return [] if empty? scan(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/).uniq end |
#extract_entities ⇒ Object
Entity extraction
10 11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/string_magic/core/analysis.rb', line 10 def extract_entities return default_entities_hash if empty? { emails: extract_emails, urls: extract_urls, phone_numbers: extract_phones, dates: extract_dates, hashtags: , mentions: extract_mentions } end |
#extract_hashtags ⇒ Object
57 58 59 60 |
# File 'lib/string_magic/core/analysis.rb', line 57 def return [] if empty? scan(/#(\w+)/).flatten.uniq end |
#extract_mentions ⇒ Object
62 63 64 65 |
# File 'lib/string_magic/core/analysis.rb', line 62 def extract_mentions return [] if empty? scan(/(?:^|\s)@([A-Za-z0-9_]+)/).flatten.uniq end |
#extract_phones ⇒ Object
38 39 40 41 42 |
# File 'lib/string_magic/core/analysis.rb', line 38 def extract_phones return [] if empty? phone_re = /(?:\+\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}(?!\d)/ scan(phone_re).uniq end |
#extract_urls ⇒ Object
28 29 30 31 32 33 34 35 36 |
# File 'lib/string_magic/core/analysis.rb', line 28 def extract_urls return [] if empty? # initial capture urls = scan(%r{https?://[^\s<>"']+|www\.[^\s<>"']+}) # strip trailing punctuation like . , ; : ! ? ) urls.map { |u| u.gsub(/[\.,;:!?)+]+\z/, '') }.uniq end |
#readability_score ⇒ Object
Text statistics
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/string_magic/core/analysis.rb', line 71 def readability_score return 0.0 if empty? sentences = split(/[.!?]+/).map(&:strip).reject(&:empty?) return 0.0 if sentences.empty? words = scan(/\b[\p{L}\p{N}'-]+\b/) return 0.0 if words.empty? syllables = words.sum { |w| calculate_syllables(w) } return 0.0 if syllables.zero? score = 0.39 * (words.size.to_f / sentences.size) + 11.8 * (syllables.to_f / words.size) - 15.59 score.round(1).clamp(0, Float::INFINITY) end |
#sentiment_indicators ⇒ Object
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/string_magic/core/analysis.rb', line 94 def sentiment_indicators return { positive: 0, negative: 0, neutral: 1 } if empty? positive_words = %w[good great excellent amazing wonderful fantastic happy joy love like best awesome] negative_words = %w[bad terrible awful horrible sad hate dislike worst annoying frustrating] words = downcase.scan(/\b[\p{L}\p{N}'-]+\b/) pos = words.count { |w| positive_words.include?(w) } neg = words.count { |w| negative_words.include?(w) } total = pos + neg if total.zero? { positive: 0, negative: 0, neutral: 1 } else { positive: (pos.to_f / total).round(2), negative: (neg.to_f / total).round(2), neutral: 0 } end end |
#word_frequency ⇒ Object
89 90 91 92 |
# File 'lib/string_magic/core/analysis.rb', line 89 def word_frequency return {} if empty? downcase.scan(/\b[\p{L}\p{N}'-]+\b/).tally end |