Module: CollectTwitterMedia

Extended by:
CollectTwitterMedia
Includes:
FileOperation
Included in:
CollectTwitterMedia
Defined in:
lib/collect_twitter_media.rb,
lib/collect_twitter_media/version.rb

Constant Summary collapse

VERSION =
"1.1.4"

Instance Method Summary collapse

Methods included from FileOperation

#basename_of_image_file, #make_directory_if_not_exist, #remove_image, #to_pathname

Instance Method Details

#access_token(value) ⇒ Object



32
33
34
# File 'lib/collect_twitter_media.rb', line 32

def access_token(value)
  @access_token = value
end

#access_token_secret(value) ⇒ Object



36
37
38
# File 'lib/collect_twitter_media.rb', line 36

def access_token_secret(value)
  @access_token_secret = value
end

#append_csv_row(csv_filename, tweet) ⇒ Object

HACK: ‘media_uri_and_filename’ method is duplicated in ‘save_image_file’ method



169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/collect_twitter_media.rb', line 169

def append_csv_row(csv_filename, tweet)
  media_uri_and_filename(tweet).each do |media_data|
    row = [
      media_data['tweet_id'],
      media_data['screen_name'],
      media_data['media_filename'],
      "@#{media_data['screen_name']}_#{media_data['tweet_id']}_#{media_data['media_filename']}",
      media_data['media_original_uri'],
      media_data['created_at'],
    ]

    CSV.open(csv_filename, 'a') do |csv_file|
      csv_file << row
    end
  end
end

#collect_tweets(until_tweet_id = '', count = 200) ⇒ Object

‘until_tweet_id’ is EQUAL OR LESS THAN ‘until_tweet_id’



50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/collect_twitter_media.rb', line 50

def collect_tweets(until_tweet_id='', count=200)
  begin
    unless until_tweet_id.is_a?(Integer)
      @client.home_timeline(count: count, include_rts: true, tweet_mode: 'extended')
    else
      @client.home_timeline(count: count, max_id: until_tweet_id, include_rts: true, tweet_mode: 'extended')
    end
  rescue => e
    puts e
    exit(1)
  end
end

#collect_tweets_with_loop(loop_count = 1, tweet_count = 200, start_tweet_id = '') ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/collect_twitter_media.rb', line 63

def collect_tweets_with_loop(loop_count=1, tweet_count=200, start_tweet_id='')
  tweet_collection  = []
  until_tweet_id    = start_tweet_id

  loop_count.times do
    tweets = collect_tweets(until_tweet_id, tweet_count)
    break if tweets.empty?
    tweet_collection << tweets

    next_start_tweet_id = min_tweet_id(tweet_id_collection(tweets)) - 1
    until_tweet_id      = next_start_tweet_id
  end
  tweet_collection.flatten
end

#consumer_key(value) ⇒ Object

HACK: TOO LONG…(can I use block?)



24
25
26
# File 'lib/collect_twitter_media.rb', line 24

def consumer_key(value)
  @consumer_key = value
end

#consumer_secret(value) ⇒ Object



28
29
30
# File 'lib/collect_twitter_media.rb', line 28

def consumer_secret(value)
  @consumer_secret = value
end

#create_csv_file(save_directory, base_filename = 'image_from_twitter') ⇒ Object



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/collect_twitter_media.rb', line 150

def create_csv_file(save_directory, base_filename='image_from_twitter')
  now_time = Time.now.strftime("%Y%m%d_%H%M%S")
  filename = "#{save_directory}/#{base_filename}_#{now_time}.csv"
  header = [
    'tweet_id',
    'screen_name',
    'original_filename',
    'save_filename',
    'uri',
    'created_at',
  ]
  CSV.open(filename, 'w') do |csv_file|
    csv_file << header
  end

  filename
end

#max_tweet_id(tweet_id_collection) ⇒ Object



90
91
92
# File 'lib/collect_twitter_media.rb', line 90

def max_tweet_id(tweet_id_collection)
  tweet_id_collection.max
end

#media_filename(media_uri) ⇒ Object



110
111
112
# File 'lib/collect_twitter_media.rb', line 110

def media_filename(media_uri)
  media_uri.match(/https:\/\/pbs\.twimg\.com\/media\/(.*)\z/)[1] # with extension
end

#media_original_uri(media_uri) ⇒ Object



106
107
108
# File 'lib/collect_twitter_media.rb', line 106

def media_original_uri(media_uri)
  media_original_uri = "#{media_uri}:orig"
end

#media_uri_and_filename(tweet) ⇒ Object

if several attachment image files exist, we save all ones



135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/collect_twitter_media.rb', line 135

def media_uri_and_filename(tweet)
  media_uri_and_filename = []
  media_uris(tweet).each do |media_uri|
    insert = {}
    insert['tweet_id']            = tweet.id
    insert['media_original_uri']  = media_original_uri(media_uri)
    insert['media_filename']      = media_filename(media_uri)
    insert['screen_name']         = tweet.attrs[:user][:screen_name]
    insert['created_at']          = tweet.created_at

    media_uri_and_filename << insert
  end
  media_uri_and_filename
end

#media_uris(tweet) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
# File 'lib/collect_twitter_media.rb', line 94

def media_uris(tweet)
  media_uris = []
  if tweet.media?
    tweet.media.each do |media|
      if media.instance_of?(Twitter::Media::Photo)
        media_uris << media.media_url_https.to_s
      end
    end
  end
  media_uris
end

#media_uris_and_filenames(tweets) ⇒ Object

deprecated method because when tweet is retweet it doesn’t work correctly



187
188
189
190
191
192
193
# File 'lib/collect_twitter_media.rb', line 187

def media_uris_and_filenames(tweets)
  media_uris_and_filenames = []
  tweets.each do |tweet|
    media_uris_and_filenames << media_uri_and_filename(tweet)
  end
  media_uris_and_filenames.flatten
end

#min_tweet_id(tweet_id_collection) ⇒ Object



86
87
88
# File 'lib/collect_twitter_media.rb', line 86

def min_tweet_id(tweet_id_collection)
  tweet_id_collection.min
end

#original_tweet(tweet) ⇒ Object



114
115
116
117
118
119
120
# File 'lib/collect_twitter_media.rb', line 114

def original_tweet(tweet)
  if tweet.retweet?
    @client.status(tweet.attrs[:retweeted_status][:id], tweet_mode: "extended") # HACK: this occurs be slow response sometimes
  else
    tweet
  end
end

#save(directory, tweet_count = 200, loop_count = 1, start_tweet_id = '') ⇒ Object

HACK: too many argv



10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/collect_twitter_media.rb', line 10

def save(directory, tweet_count=200, loop_count=1, start_tweet_id='') # HACK: too many argv
  twitter_client
  tweet_collection  = collect_tweets_with_loop(loop_count)
  save_directory    = make_directory_if_not_exist(directory)
  csv_filename      = create_csv_file(save_directory)

  tweet_collection.each do |tweet|
    tweet = original_tweet(tweet) # retweet is NOT correct data, so need to get from original tweet
    save_image_file(save_directory, tweet)
    append_csv_row(csv_filename, tweet)
  end
end

#save_image_file(save_directory, tweet) ⇒ Object



122
123
124
125
126
127
128
129
130
131
132
# File 'lib/collect_twitter_media.rb', line 122

def save_image_file(save_directory, tweet)
  media_uri_and_filename(tweet).each do |media_data|
    tweet_id            = media_data['tweet_id']
    media_original_uri  = media_data['media_original_uri']
    media_filename      = media_data['media_filename']
    screen_name         = media_data['screen_name']

    command = "wget -q #{media_original_uri} -O #{save_directory}/@#{screen_name}_#{tweet_id}_#{media_filename}"
    `#{command}`
  end
end

#tweet_id_collection(tweets) ⇒ Object



78
79
80
81
82
83
84
# File 'lib/collect_twitter_media.rb', line 78

def tweet_id_collection(tweets)
  tweet_id_collection = []
  tweets.each do |tweet|
    tweet_id_collection << tweet.id
  end
  tweet_id_collection
end

#twitter_clientObject



40
41
42
43
44
45
46
47
# File 'lib/collect_twitter_media.rb', line 40

def twitter_client
  @client = Twitter::REST::Client.new do |config|
    config.consumer_key        = @consumer_key
    config.consumer_secret     = @consumer_secret
    config.access_token        = @access_token
    config.access_token_secret = @access_token_secret
  end
end

#via_client(tweet) ⇒ Object

not used



196
197
198
199
# File 'lib/collect_twitter_media.rb', line 196

def via_client(tweet)
  source = tweet.source # ex. "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>"
  source.match(/\A<a href=".*>(.*)<\/a>\z/)[1]
end