Class: TwitterParser

Inherits:
Object
  • Object
show all
Includes:
Twitter::TwitterText::Extractor
Defined in:
lib/twitter_parser.rb

Instance Method Summary collapse

Constructor Details

#initialize(tweet) ⇒ TwitterParser

Returns a new instance of TwitterParser.



7
8
9
# File 'lib/twitter_parser.rb', line 7

def initialize(tweet)
  @tweet = Nokogiri::HTML.parse(tweet)
end

Instance Method Details

#get_conversation_idObject



95
96
97
# File 'lib/twitter_parser.rb', line 95

def get_conversation_id
	@tweet.css(".tweet")[0]["data-conversation-id"]
end

#get_favorite_countObject



91
92
93
# File 'lib/twitter_parser.rb', line 91

def get_favorite_count
  @tweet.css(".ProfileTweet-action--favorite")[0].css("span")[0]['data-tweet-stat-count']
end

#get_fullnameObject



61
62
63
# File 'lib/twitter_parser.rb', line 61

def get_fullname
  @tweet.css(".fullname").text
end

#get_hashtagsObject

Get hashtags in the tweet



52
53
54
55
# File 'lib/twitter_parser.rb', line 52

def get_hashtags
  tweet = get_tweet_text
  return extract_hashtags(tweet)
end

#get_is_reply_toObject



99
100
101
# File 'lib/twitter_parser.rb', line 99

def get_is_reply_to
  @tweet.css(".tweet")[0]["data-is-reply-to"]
end

#get_mentioned_urlsObject

Get URLS in the tweet



46
47
48
49
# File 'lib/twitter_parser.rb', line 46

def get_mentioned_urls
  tweet = get_tweet_text
  return extract_urls(tweet)
end

#get_mentionsObject

Get account names and uids that are mentioned



120
121
122
123
124
125
126
127
128
129
# File 'lib/twitter_parser.rb', line 120

def get_mentions
  mentions = @tweet.css(".twitter-atreply")
  if !mentions.empty?
    mention_names = mentions.map{|t| t.css("b").text}
    mention_uids = mentions.map{|t| t['data-mentioned-user-id']}
    return mention_names, mention_uids
  else
    return nil, nil
  end
end

#get_profile_picObject

Get URL to the profile pic



41
42
43
# File 'lib/twitter_parser.rb', line 41

def get_profile_pic
  @tweet.css("img.avatar")[0]['src']
end

#get_reply_countObject



103
104
105
# File 'lib/twitter_parser.rb', line 103

def get_reply_count
  @tweet.css(".ProfileTweet-action--reply")[0].css("span")[0]['data-tweet-stat-count']
end

#get_reply_to_userObject

The user of the tweet that is being replied to (if any)



108
109
110
111
112
113
114
115
116
117
# File 'lib/twitter_parser.rb', line 108

def get_reply_to_user
  reply_to = @tweet.css("span").select{|s| s.text.include?("In reply")}[0]
  if reply_to
    reply_to_user = reply_to.css("a")[0]['href'].gsub("/", "")
    reply_to_uid = reply_to.css("a")[0]['data-user-id']
    return reply_to_user, reply_to_uid
  else
    return nil, nil
  end
end

#get_retweet_countObject



87
88
89
# File 'lib/twitter_parser.rb', line 87

def get_retweet_count
  @tweet.css(".ProfileTweet-action--retweet")[0].css("span")[0]['data-tweet-stat-count']
end

#get_tweet_idObject



79
80
81
# File 'lib/twitter_parser.rb', line 79

def get_tweet_id
  @tweet.css(".tweet")[0]["data-tweet-id"]
end


83
84
85
# File 'lib/twitter_parser.rb', line 83

def get_tweet_link
  "https://twitter.com"+@tweet.css(".tweet")[0]["data-permalink-path"]
end

#get_tweet_textObject

Get the tweet text



70
71
72
# File 'lib/twitter_parser.rb', line 70

def get_tweet_text
  @tweet.css(".js-tweet-text-container").text.lstrip.strip
end

#get_tweet_timeObject

Get the time of the tweet



75
76
77
# File 'lib/twitter_parser.rb', line 75

def get_tweet_time
  DateTime.parse(@tweet.css(".tweet-timestamp")[0]["title"]).strftime('%d %b %Y %H:%M:%S')
end

#get_user_idObject



65
66
67
# File 'lib/twitter_parser.rb', line 65

def get_user_id
  @tweet.css(".js-user-profile-link").css(".account-group")[0]["data-user-id"]
end

#get_usernameObject



57
58
59
# File 'lib/twitter_parser.rb', line 57

def get_username
  @tweet.css(".tweet")[0]["data-screen-name"]
end

#parse_tweetObject

Parse the individual tweet



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/twitter_parser.rb', line 12

def parse_tweet
  if !@tweet.text.empty?
    return {
      tweet_text: get_tweet_text,
      username: get_username,
      fullname: get_fullname,
      user_id: get_user_id,
      profile_pic: get_profile_pic,
      hashtags: get_hashtags,
      mentioned_urls: get_mentioned_urls,
      conversation_id: get_conversation_id,
      is_reply_to: get_is_reply_to,
      reply_to_user: get_reply_to_user[0],
      reply_to_uid: get_reply_to_user[1],
	tweet_id: get_tweet_id,
      tweet_time: get_tweet_time,
      tweet_link: get_tweet_link,
      retweet_count: get_retweet_count,
      favorite_count: get_favorite_count,
      reply_count: get_reply_count,
      mention_names: get_mentions[0],
      mention_uids: get_mentions[1],
      time_collected: Time.now,
      date_searchable: get_tweet_time
    }
  end
end