Class: Birdwatcher::Modules::Statuses::WordCloud
- Inherits:
-
Birdwatcher::Module
- Object
- Birdwatcher::Module
- Birdwatcher::Modules::Statuses::WordCloud
- Defined in:
- lib/birdwatcher/modules/statuses/word_cloud.rb
Constant Summary collapse
- DEFAULT_EXCLUDED_WORDS =
%w(rt via oh)
Constants inherited from Birdwatcher::Module
Birdwatcher::Module::MODULE_PATH
Constants included from Concerns::Concurrency
Concerns::Concurrency::DEFAULT_THREAD_POOL_SIZE
Constants included from Concerns::Core
Concerns::Core::DATA_DIRECTORY
Class Method Summary collapse
Instance Method Summary collapse
Methods inherited from Birdwatcher::Module
_file_path, _file_path=, descendants, #execute, inherited, meta, meta=, module_by_path, module_paths, modules, path
Methods included from Concerns::Concurrency
Methods included from Concerns::Persistence
included, #save_status, #save_user
Methods included from Concerns::Presentation
included, #make_status_summary_output, #make_url_summary_output, #make_user_details_output, #make_user_summary_output, #output_status_summary, #output_user_details, #output_user_summary, #page_text
Methods included from Concerns::Outputting
#confirm, #error, #fatal, included, #info, #line_separator, #newline, #output, #output_formatted, #task, #warn
Methods included from Concerns::Util
#escape_html, #excerpt, included, #parse_time, #pluralize, #strip_control_characters, #strip_html, #suppress_output, #suppress_warnings, #time_ago_in_words, #unescape_html
Methods included from Concerns::Core
#console, #current_workspace, #current_workspace=, #database, included, #klout_client, #read_data_file, #twitter_client
Class Method Details
.info ⇒ Object
100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/birdwatcher/modules/statuses/word_cloud.rb', line 100 def self.info <<-INFO The Word Cloud module can generate a classic weighted word cloud from words used in statuses across all or specific users and between different times. The module is heavily configurable; have a look at the options with #{'show options'.bold} Please note that configuring the module with a long timespan might result in a very long execution time when the word cloud image is generated. The generated image will be in PNG format. INFO end |
Instance Method Details
#run ⇒ Object
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# File 'lib/birdwatcher/modules/statuses/word_cloud.rb', line 114 def run if option_setting("USERS") user_ids = current_workspace.users_dataset.where("screen_name IN ?", option_setting("USERS").split(" ").map(&:strip)).map(&:id) statuses = current_workspace.statuses_dataset.where("user_id IN ?", user_ids) else statuses = current_workspace.statuses_dataset end if option_setting("SINCE") since = parse_time(option_setting("SINCE")).strftime("%Y-%m-%d") else since = (Date.today - 7).strftime("%Y-%m-%d") end if option_setting("BEFORE") before = parse_time(option_setting("BEFORE")).strftime("%Y-%m-%d") else before = Time.now.strftime("%Y-%m-%d") end statuses = statuses.where("DATE(posted_at) >= DATE(?) AND DATE(posted_at) <= DATE(?)", since, before).all if statuses.count.zero? error("There are no statuses to process") return false end prepare_exclusion_list words = {} sorted_words = [] task("Processing #{statuses.count.to_s.bold} statuses...") do statuses.each do |status| split_into_words(status.text).each do |word| next if exclude_word?(word) words.key?(word) ? words[word] += 1 : words[word] = 1 end if option_setting("INCLUDE_PAGE_TITLES") status.urls_dataset .where("title IS NOT NULL") .where("final_url NOT LIKE 'https://twitter.com/%'") .map(&:title).each do |page_title| split_into_words(page_title).each do |word| next if exclude_word?(word) words.key?(word) ? words[word] += 1 : words[word] = 1 end end end end if option_setting("MIN_WORD_COUNT") words.delete_if { |word, count| count < option_setting("MIN_WORD_COUNT").to_i } end sorted_words = words.sort_by { |word, count| count}.reverse if option_setting("WORD_CAP") sorted_words = sorted_words.take(option_setting("WORD_CAP").to_i) end end task("Generating word cloud, patience please...") do cloud = MagicCloud::Cloud.new(sorted_words, :rotate => :none, :palette => option_setting("PALETTE").split(" ").map(&:strip) ).draw(option_setting("IMAGE_WIDTH").to_i, option_setting("IMAGE_HEIGHT").to_i).to_blob { self.format = "png" } File.open(option_setting("DEST"), "wb") { |f| f.write(cloud) } end info("Word cloud written to #{option_setting('DEST').bold}") end |