Class: Birdwatcher::Modules::Statuses::WordCloud

Inherits:
Birdwatcher::Module show all
Defined in:
lib/birdwatcher/modules/statuses/word_cloud.rb

Constant Summary collapse

DEFAULT_EXCLUDED_WORDS =
%w(rt via oh)

Constants inherited from Birdwatcher::Module

Birdwatcher::Module::MODULE_PATH

Constants included from Concerns::Concurrency

Concerns::Concurrency::DEFAULT_THREAD_POOL_SIZE

Constants included from Concerns::Core

Concerns::Core::DATA_DIRECTORY

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Birdwatcher::Module

_file_path, _file_path=, descendants, #execute, inherited, meta, meta=, module_by_path, module_paths, modules, path

Methods included from Concerns::Concurrency

included, #thread_pool

Methods included from Concerns::Persistence

included, #save_status, #save_user

Methods included from Concerns::Presentation

included, #make_status_summary_output, #make_url_summary_output, #make_user_details_output, #make_user_summary_output, #output_status_summary, #output_user_details, #output_user_summary, #page_text

Methods included from Concerns::Outputting

#confirm, #error, #fatal, included, #info, #line_separator, #newline, #output, #output_formatted, #task, #warn

Methods included from Concerns::Util

#escape_html, #excerpt, included, #parse_time, #pluralize, #strip_control_characters, #strip_html, #suppress_output, #suppress_warnings, #time_ago_in_words, #unescape_html

Methods included from Concerns::Core

#console, #current_workspace, #current_workspace=, #database, included, #klout_client, #read_data_file, #twitter_client

Class Method Details

.infoObject



100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/birdwatcher/modules/statuses/word_cloud.rb', line 100

def self.info
<<-INFO
The Word Cloud module can generate a classic weighted word cloud from words used
in statuses across all or specific users and between different times.

The module is heavily configurable; have a look at the options with #{'show options'.bold}

Please note that configuring the module with a long timespan might result in a
very long execution time when the word cloud image is generated.

The generated image will be in PNG format.
INFO
end

Instance Method Details

#runObject



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/birdwatcher/modules/statuses/word_cloud.rb', line 114

def run
  if option_setting("USERS")
    user_ids = current_workspace.users_dataset.where("screen_name IN ?", option_setting("USERS").split(" ").map(&:strip)).map(&:id)
    statuses = current_workspace.statuses_dataset.where("user_id IN ?", user_ids)
  else
    statuses = current_workspace.statuses_dataset
  end
  if option_setting("SINCE")
    since = parse_time(option_setting("SINCE")).strftime("%Y-%m-%d")
  else
    since = (Date.today - 7).strftime("%Y-%m-%d")
  end
  if option_setting("BEFORE")
    before = parse_time(option_setting("BEFORE")).strftime("%Y-%m-%d")
  else
    before = Time.now.strftime("%Y-%m-%d")
  end
  statuses = statuses.where("DATE(posted_at) >= DATE(?) AND DATE(posted_at) <= DATE(?)", since, before).all
  if statuses.count.zero?
    error("There are no statuses to process")
    return false
  end
  prepare_exclusion_list
  words        = {}
  sorted_words = []
  task("Processing #{statuses.count.to_s.bold} statuses...") do
    statuses.each do |status|
      split_into_words(status.text).each do |word|
        next if exclude_word?(word)
        words.key?(word) ? words[word] += 1 : words[word] = 1
      end
      if option_setting("INCLUDE_PAGE_TITLES")
        status.urls_dataset
        .where("title IS NOT NULL")
        .where("final_url NOT LIKE 'https://twitter.com/%'")
        .map(&:title).each do |page_title|
          split_into_words(page_title).each do |word|
            next if exclude_word?(word)
            words.key?(word) ? words[word] += 1 : words[word] = 1
          end
        end
      end
    end
    if option_setting("MIN_WORD_COUNT")
      words.delete_if { |word, count| count < option_setting("MIN_WORD_COUNT").to_i }
    end
    sorted_words = words.sort_by { |word, count| count}.reverse
    if option_setting("WORD_CAP")
      sorted_words = sorted_words.take(option_setting("WORD_CAP").to_i)
    end
  end
  task("Generating word cloud, patience please...") do
    cloud = MagicCloud::Cloud.new(sorted_words,
      :rotate  => :none,
      :palette => option_setting("PALETTE").split(" ").map(&:strip)
    ).draw(option_setting("IMAGE_WIDTH").to_i, option_setting("IMAGE_HEIGHT").to_i).to_blob { self.format = "png" }
    File.open(option_setting("DEST"), "wb") { |f| f.write(cloud) }
  end
  info("Word cloud written to #{option_setting('DEST').bold}")
end