Class: Pork::Search
- Inherits:
-
Object
- Object
- Pork::Search
- Defined in:
- lib/pork_sandwich/search.rb
Instance Attribute Summary collapse
-
#current_count ⇒ Object
readonly
Returns the value of attribute current_count.
-
#db_ids_created ⇒ Object
readonly
Returns the value of attribute db_ids_created.
-
#desired_count ⇒ Object
Returns the value of attribute desired_count.
-
#from_user ⇒ Object
Returns the value of attribute from_user.
-
#pulls_per_hour ⇒ Object
Returns the value of attribute pulls_per_hour.
-
#query ⇒ Object
readonly
Returns the value of attribute query.
-
#since_id ⇒ Object
Returns the value of attribute since_id.
Instance Method Summary collapse
- #historical_pull ⇒ Object
-
#initialize(query, options = {}) ⇒ Search
constructor
A new instance of Search.
- #manage_pull_rate(time_at_start) ⇒ Object
- #reached_desired_count? ⇒ Boolean
- #reached_since_id?(id) ⇒ Boolean
- #reduce_pull_rate ⇒ Object
Constructor Details
#initialize(query, options = {}) ⇒ Search
Returns a new instance of Search.
6 7 8 9 10 11 12 13 14 15 |
# File 'lib/pork_sandwich/search.rb', line 6 def initialize(query, = {}) @query = query @desired_count = [:desired_count] #if nil, will pull as far back as the Search API allows @current_count = 0 @since_id = [:since_id] @from_user = [:from_user] @db_ids_created = [] @collect_users = [:collect_users] @pulls_per_hour = [:pulls_per_hour]? [:pulls_per_hour] : 1500 end |
Instance Attribute Details
#current_count ⇒ Object (readonly)
Returns the value of attribute current_count.
4 5 6 |
# File 'lib/pork_sandwich/search.rb', line 4 def current_count @current_count end |
#db_ids_created ⇒ Object (readonly)
Returns the value of attribute db_ids_created.
4 5 6 |
# File 'lib/pork_sandwich/search.rb', line 4 def db_ids_created @db_ids_created end |
#desired_count ⇒ Object
Returns the value of attribute desired_count.
3 4 5 |
# File 'lib/pork_sandwich/search.rb', line 3 def desired_count @desired_count end |
#from_user ⇒ Object
Returns the value of attribute from_user.
3 4 5 |
# File 'lib/pork_sandwich/search.rb', line 3 def from_user @from_user end |
#pulls_per_hour ⇒ Object
Returns the value of attribute pulls_per_hour.
3 4 5 |
# File 'lib/pork_sandwich/search.rb', line 3 def pulls_per_hour @pulls_per_hour end |
#query ⇒ Object (readonly)
Returns the value of attribute query.
4 5 6 |
# File 'lib/pork_sandwich/search.rb', line 4 def query @query end |
#since_id ⇒ Object
Returns the value of attribute since_id.
3 4 5 |
# File 'lib/pork_sandwich/search.rb', line 3 def since_id @since_id end |
Instance Method Details
#historical_pull ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/pork_sandwich/search.rb', line 17 def historical_pull @search_params = Twitter::Search.new(@query).per_page(100) @search_params.from(@from_user) if @from_user begin loop do time_at_start = Time.now if $PORK_LOG $PORK_LOG.write("historical pull, query = #{@query}, max_id = #{@search_params.query[:max_id].to_s}") end @return_data = @search_params.dup.fetch if @return_data.error == "You have been rate limited. Enhance your calm." raise Pork::RateLimitExceeded end @tweets_pulled = @return_data.results @tweets_pulled.each do |tweet| tweet.status_id = tweet.id if reached_desired_count? or reached_since_id?(tweet.status_id) break end @db_ids_created << $SAVER.save(tweet, &TWEET_SAVE).id # $CRAWLER.append(tweet.from_user) if @collect_users @current_count += 1 end if reached_desired_count? or @search_params.query[:max_id] == @tweets_pulled.last.id or reached_since_id?(@tweets_pulled.last.id) break else @search_params.query[:max_id] = @tweets_pulled.last.id end manage_pull_rate(time_at_start) end rescue Twitter::Unavailable if $PORK_LOG $PORK_LOG.write("ERROR: Twitter unavailable, trying in 60") end sleep 60 retry rescue Twitter::NotFound if $PORK_LOG $PORK_LOG.write("ERROR: Info target not found, trying to skip") end retry rescue Crack::ParseError if $PORK_LOG $PORK_LOG.write("Error: JSON Parsing error, trying to skip past problem tweet") end @search_params.query[:max_id] -= 1000 if @search_params.query[:max_id] manage_pull_rate retry rescue Errno::ETIMEDOUT if $PORK_LOG $PORK_LOG.write("ERROR: Puller timed out, retrying in 10") end sleep 10 retry rescue Twitter::InformTwitter if $PORK_LOG $PORK_LOG.write("ERROR: Twitter internal error, retrying in 30") end sleep 30 retry rescue Pork::RateLimitExceeded if $PORK_LOG $PORK_LOG.write("ERROR: Rate limit exceeded; holding off for a bit then trying again") end sleep 300 reduce_pull_rate retry rescue Timeout::Error if $PORK_LOG $PORK_LOG.write("ERROR: Request Timed out. Retrying in 30") end sleep 30 retry end return true end |
#manage_pull_rate(time_at_start) ⇒ Object
102 103 104 105 106 107 108 109 110 111 |
# File 'lib/pork_sandwich/search.rb', line 102 def manage_pull_rate(time_at_start) desired_pause = 1.0 / (@pulls_per_hour / 60.0 / 60.0) pull_duration = Time.now - time_at_start if desired_pause - pull_duration > 0 actual_pause = desired_pause - pull_duration else actual_pause = 0 end sleep actual_pause end |
#reached_desired_count? ⇒ Boolean
94 95 96 97 98 99 100 |
# File 'lib/pork_sandwich/search.rb', line 94 def reached_desired_count? if @desired_count return @current_count >= @desired_count else return false end end |
#reached_since_id?(id) ⇒ Boolean
119 120 121 122 123 124 125 |
# File 'lib/pork_sandwich/search.rb', line 119 def reached_since_id?(id) if @since_id return id <= @since_id else return false end end |
#reduce_pull_rate ⇒ Object
113 114 115 116 117 |
# File 'lib/pork_sandwich/search.rb', line 113 def reduce_pull_rate if @pulls_per_hour > 100 @pulls_per_hour -= 100 end end |