Class: Wuclan::Twitter::Scrape::TwitterSearchRequest

Inherits:
Monkeyshines::ScrapeRequest
  • Object
show all
Includes:
Monkeyshines::RawJsonContents, Monkeyshines::ScrapeRequestCore::Paginated, Model
Defined in:
lib/wuclan/twitter/parse/twitter_search_parse.rb,
lib/wuclan/twitter/scrape/twitter_search_request.rb

Overview

ScrapeRequest for the twitter Search API.

Examines the parsed contents to describe result

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ TwitterSearchRequest

Returns a new instance of TwitterSearchRequest.



19
20
21
22
23
24
25
# File 'lib/wuclan/twitter/scrape/twitter_search_request.rb', line 19

def initialize *args
  if args.first =~ %r{\Ahttp://.*q=([^&]+)&}
    super $1, nil, {}, *args
  else
    super *args
  end
end

Class Method Details

.encode_screen_name(screen_name) ⇒ Object



51
52
53
54
# File 'lib/wuclan/twitter/parse/twitter_search_parse.rb', line 51

def self.encode_screen_name screen_name
  screen_name.wukong_encode!(:url) if (screen_name =~ /\W/)
  screen_name
end

Instance Method Details

#encode_and_sanitize!(item) ⇒ Object



44
45
46
47
48
49
# File 'lib/wuclan/twitter/parse/twitter_search_parse.rb', line 44

def encode_and_sanitize! item
  item['from_user'].wukong_encode!(:url)
  item['to_user'].wukong_encode!(:url) unless item['to_user'].blank?
  item['text'].wukong_encode!
  item['created_at'] = Time.parse(item['created_at']).utc.to_flat
end

#healthy?Boolean

Checks that the response parses and has the right data structure. if healthy? is true things should generally work

Returns:

  • (Boolean)


43
44
45
# File 'lib/wuclan/twitter/scrape/twitter_search_request.rb', line 43

def healthy?
  items && items.is_a?(Array)
end

#itemsObject

Extract the actual search items returned



52
53
54
# File 'lib/wuclan/twitter/scrape/twitter_search_request.rb', line 52

def items
  parsed_contents['results'] if parsed_contents
end

#keyObject



37
38
39
# File 'lib/wuclan/twitter/scrape/twitter_search_request.rb', line 37

def key
  identifier
end

#make_urlObject



30
31
32
# File 'lib/wuclan/twitter/scrape/twitter_search_request.rb', line 30

def make_url
  "http://search.twitter.com/search.json?q=#{query_term}"
end

#parse(*args, &block) ⇒ Object

Parse



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/wuclan/twitter/parse/twitter_search_parse.rb', line 17

def parse *args, &block
  items.each do |item|
    self.encode_and_sanitize!(item)
    tweet = tweet_from_parse(item)
    from_user_sid = TwitterUserSearchId.new(item['from_user'], item['from_user_id'])
    to_user_sid   = TwitterUserSearchId.new(item['to_user'],   item['to_user_id']  ) if (! item['to_user_id'].blank?)
    yield tweet
    yield from_user_sid
    if to_user_sid
      yield to_user_sid
      yield ARepliesBName.new(
        from_user_sid.screen_name, to_user_sid.screen_name,
        tweet.id, nil,
        from_user_sid.sid, to_user_sid.sid)
    end
  end
end

#query_termObject



34
35
36
# File 'lib/wuclan/twitter/scrape/twitter_search_request.rb', line 34

def query_term
  identifier
end

#spanObject

Span of IDs. Assumes the response has the ids in sort order oldest to newest (which the twitter API provides)



57
58
59
# File 'lib/wuclan/twitter/scrape/twitter_search_request.rb', line 57

def span
  [items.last['id'], items.first['id']] rescue nil
end

#timespanObject

Span of created_at times covered by this request. Useful for rate estimation.



62
63
64
# File 'lib/wuclan/twitter/scrape/twitter_search_request.rb', line 62

def timespan
  [Time.parse(items.last['created_at']).utc, Time.parse(items.first['created_at']).utc] rescue nil
end

#tweet_from_parse(item) ⇒ Object



35
36
37
38
39
40
41
42
# File 'lib/wuclan/twitter/parse/twitter_search_parse.rb', line 35

def tweet_from_parse item
  SearchTweet.new(item['id'], item['created_at'],
    nil, nil, nil, # twitter_user_id, favorited, truncated
    item[''], nil, item['text'],
    item['source'],
    item['to_user'],   item['to_user_id'],
    item['from_user'], item['from_user_id'], item['iso_language_code'])
end