Class: TwitterCrawler

Inherits:
Object
  • Object
show all
Defined in:
lib/twittercrawler.rb

Instance Method Summary collapse

Constructor Details

#initialize(search_term, operator, requests) ⇒ TwitterCrawler

Returns a new instance of TwitterCrawler.



9
10
11
12
13
14
# File 'lib/twittercrawler.rb', line 9

def initialize(search_term, operator, requests)
  @search_term = search_term
  @operator = operator
  @requests = requests
  @output = Array.new
end

Instance Method Details

#crawlObject



25
26
27
28
29
30
# File 'lib/twittercrawler.rb', line 25

def crawl
  @requests.get_page("https://twitter.com/search?f=tweets&q="+gen_query)
  scroll_down(0)
  get_tweets
  @requests.close_all_browsers
end

#gen_jsonObject

Generate JSON for output



61
62
63
# File 'lib/twittercrawler.rb', line 61

def gen_json
  JSON.pretty_generate(@output)
end

#gen_queryObject

Generate advanced query



17
18
19
20
21
22
23
# File 'lib/twittercrawler.rb', line 17

def gen_query
  if @operator
    return URI.encode(@search_term + " " + @operator)
  else
    return URI.encode(@search_term)
  end
end

#get_tweetsObject

Get the tweets on the page



33
34
35
36
37
38
39
40
41
42
43
# File 'lib/twittercrawler.rb', line 33

def get_tweets
  browser = @requests.get_most_recent_browser[1].first
  tweets = browser.find_elements(class: "tweet")

  # Parse each tweet
  tweets.each do |tweet|
    tweet_html = tweet.attribute("innerHTML")
    parser = TwitterParser.new(tweet_html)
    @output.push(parser.parse_tweet)
  end
end

#scroll_down(last_tweet_num) ⇒ Object

Scroll down to the bottom



46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/twittercrawler.rb', line 46

def scroll_down(last_tweet_num)
  # Scroll down to last tweet
  browser = @requests.get_most_recent_browser[1].first
  tweets = browser.find_elements(class: "tweet")
  tweets[tweets.length-2].location_once_scrolled_into_view

  # Check if it should be rerun
  sleep(1)
  tweet_count = browser.find_elements(class: "tweet").length
  if tweet_count > last_tweet_num
    scroll_down(tweet_count)
  end   
end