Class: ReplacerBot::SeenTweets

Inherits:
Object
  • Object
show all
Defined in:
lib/replacer_bot/seen_tweets.rb

Class Method Summary collapse

Class Method Details

.clean_urls(string) ⇒ Object



45
46
47
# File 'lib/replacer_bot/seen_tweets.rb', line 45

def self.clean_urls string
  string.gsub /https?:\/\/[^ ]*/, '__URL__'
end

.hashtag_nuker(string:, other_end: false) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/replacer_bot/seen_tweets.rb', line 49

def self.hashtag_nuker string:, other_end: false
  words = string.split ' '
  words.reverse! if other_end

  no_hashtag_yet = false

  a = []
  words.each do |token|
    unless ReplacerBot.is_hashtag token
      no_hashtag_yet = true
    end

    if no_hashtag_yet
      a.push token
    end
  end

  a.reverse! if other_end
  a.join ' '
end

.nuke_hashtags(string) ⇒ Object



70
71
72
# File 'lib/replacer_bot/seen_tweets.rb', line 70

def self.nuke_hashtags string
  hashtag_nuker string: (hashtag_nuker string: string, other_end: true)
end

.retrieveObject



37
38
39
40
41
42
43
# File 'lib/replacer_bot/seen_tweets.rb', line 37

def self.retrieve
  begin
    Marshal.load File.open Config.instance.config.seen_tweets
  rescue Errno::ENOENT
    Set.new
  end
end

.sanitise(tweet) ⇒ Object



74
75
76
# File 'lib/replacer_bot/seen_tweets.rb', line 74

def self.sanitise tweet
  nuke_hashtags clean_urls tweet
end

.save(set) ⇒ Object



88
89
90
91
92
# File 'lib/replacer_bot/seen_tweets.rb', line 88

def self.save set
  File.open Config.instance.config.seen_tweets, 'w' do |file|
    Marshal.dump unshift(set), file
  end
end

.similar(tweet, other_tweet, weighting: Config.instance.config.similarity_weighting) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/replacer_bot/seen_tweets.rb', line 23

def self.similar tweet, other_tweet, weighting: Config.instance.config.similarity_weighting
  tweet_words = tweet.split ' '
  return false if tweet_words.count < weighting

  match = false

  (tweet_words.count - (weighting - 1)).times do |i|
    sample = tweet_words[i, weighting].join(' ').downcase
    match = true if sanitise(other_tweet.downcase).index sanitise(sample)
  end

  match
end

.similar_to_archive(tweet, archive) ⇒ Object



13
14
15
16
17
18
19
20
21
# File 'lib/replacer_bot/seen_tweets.rb', line 13

def self.similar_to_archive tweet, archive
  match = false

  archive.each do |archived_tweet|
    match = true if similar(tweet, archived_tweet)
  end

  match
end

.unshift(set) ⇒ Object



78
79
80
81
82
83
84
85
86
# File 'lib/replacer_bot/seen_tweets.rb', line 78

def self.unshift set
  a = set.to_a
  max_size = Config.instance.config.max_seen_tweets
  if a.count > max_size
    a = a[-max_size..-1]
  end

  Set.new a
end

.validate(tweet) ⇒ Object



3
4
5
6
7
8
9
10
11
# File 'lib/replacer_bot/seen_tweets.rb', line 3

def self.validate tweet
  archive = retrieve
  t = sanitise tweet
  valid = not(archive.include? t) && not(similar_to_archive tweet, archive)
  archive.add t
  save archive

  valid
end