Class: TwitterCrawler
- Inherits:
-
Object
- Object
- TwitterCrawler
- Defined in:
- lib/twittercrawler.rb
Instance Method Summary collapse
- #crawl ⇒ Object
-
#gen_json ⇒ Object
Generate JSON for output.
-
#gen_query ⇒ Object
Generate advanced query.
-
#get_tweets ⇒ Object
Get the tweets on the page.
-
#initialize(search_term, operator, requests) ⇒ TwitterCrawler
constructor
A new instance of TwitterCrawler.
-
#scroll_down(last_tweet_num) ⇒ Object
Scroll down to the bottom.
Constructor Details
#initialize(search_term, operator, requests) ⇒ TwitterCrawler
Returns a new instance of TwitterCrawler.
9 10 11 12 13 14 |
# File 'lib/twittercrawler.rb', line 9 def initialize(search_term, operator, requests) @search_term = search_term @operator = operator @requests = requests @output = Array.new end |
Instance Method Details
#crawl ⇒ Object
25 26 27 28 29 30 |
# File 'lib/twittercrawler.rb', line 25 def crawl @requests.get_page("https://twitter.com/search?f=tweets&q="+gen_query) scroll_down(0) get_tweets @requests.close_all_browsers end |
#gen_json ⇒ Object
Generate JSON for output
61 62 63 |
# File 'lib/twittercrawler.rb', line 61 def gen_json JSON.pretty_generate(@output) end |
#gen_query ⇒ Object
Generate advanced query
17 18 19 20 21 22 23 |
# File 'lib/twittercrawler.rb', line 17 def gen_query if @operator return URI.encode(@search_term + " " + @operator) else return URI.encode(@search_term) end end |
#get_tweets ⇒ Object
Get the tweets on the page
33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/twittercrawler.rb', line 33 def get_tweets browser = @requests.get_most_recent_browser[1].first tweets = browser.find_elements(class: "tweet") # Parse each tweet tweets.each do |tweet| tweet_html = tweet.attribute("innerHTML") parser = TwitterParser.new(tweet_html) @output.push(parser.parse_tweet) end end |
#scroll_down(last_tweet_num) ⇒ Object
Scroll down to the bottom
46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/twittercrawler.rb', line 46 def scroll_down(last_tweet_num) # Scroll down to last tweet browser = @requests.get_most_recent_browser[1].first tweets = browser.find_elements(class: "tweet") tweets[tweets.length-2].location_once_scrolled_into_view # Check if it should be rerun sleep(1) tweet_count = browser.find_elements(class: "tweet").length if tweet_count > last_tweet_num scroll_down(tweet_count) end end |