Class: Woot

Inherits:
Object
  • Object
show all
Defined in:
lib/woot.rb

Constant Summary collapse

DOMAIN =
'woot.com'
SELLOUT_DOMAIN =
'shopping.yahoo.com'
WOOT_OFF =
'woot-off'
TWITTER_IDS =
{
  'kids'    => 66527200,
  'shirt'   => 7696162,
  'sellout' => 15458304,
  'wine'    => 1647621,
  'www'     => 734493,
  WOOT_OFF  => 20557892
}
SUBDOMAINS =
TWITTER_IDS.keys - [WOOT_OFF]

Class Method Summary collapse

Class Method Details

.scrape(subdomain = :www) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/woot.rb', line 20

def self.scrape(subdomain = :www)
  url = "http://#{subdomain}.#{DOMAIN}/"
  if subdomain.to_s == 'sellout'
    url = Scraper.define do
      process_first "div.bd>div.img>a", :url => '@href'
      result :url
    end.scrape(Net::HTTP.get(SELLOUT_DOMAIN, '/')).gsub('&', '&')
  end
  response = Net::HTTP.get(URI.parse(url))
  
  selectors = self.selectors(subdomain)
  Scraper.define do
    result *(selectors.inject([]) do |array, (pattern, results)|
      process_first pattern, results
      array += results.keys
    end)
  end.scrape(response)
end

.selectors(subdomain = :www) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/woot.rb', line 39

def self.selectors(subdomain = :www)
  @selectors = {
    '*'                         => { :subdomain => proc { |element| subdomain.to_s } },
    'h2.fn'                     => { :title => :text },
    'span.amount'               => { :price => :text },
    'ul#shippingOptions'        => { :shipping => :text },
    'img.photo'                 => { :image => '@src' },
    'div.hproduct>a'            => { :alternate_image => proc { |element| $1 if element.attributes['href'] =~ /\('([^']+)'\);/ } },
    'a.url'                     => { :url => '@href' },
    'li.comments>a'             => { :comments_url => '@href', :comments_count => proc { |element| element.children[0].content.gsub(/\D/, '') } },
    'div.story>h2'              => { :header => :text },
    'div.story>h3'              => { :sub_header => :text },
    'div.writeUp'               => { :writeup => :text },
    'div.specs'                 => { :specs => :text },
    'div.productDescription>dl' => { :details => :text },
    'a#ctl00_ctl00_ContentPlaceHolderLeadIn_ContentPlaceHolderLeadIn_SaleControl_HyperLinkWantOne' => { :purchase_url => proc do |element|
      "http://#{subdomain}.#{DOMAIN}#{element.attributes['href'].gsub(/^https?:\/\/[^\/]+/, '')}" if element.attributes.has_key?('href')
     end }
  }
end

.stopObject



68
69
70
# File 'lib/woot.rb', line 68

def self.stop
  TweetStream::Client.stop
end

.stream(twitter_username, twitter_password) ⇒ Object



60
61
62
63
64
65
66
# File 'lib/woot.rb', line 60

def self.stream(twitter_username, twitter_password)
  TweetStream::Client.new(twitter_username, twitter_password).follow(*TWITTER_IDS.values) do |status|
    subdomain = TWITTER_IDS.index(status.user.id)
    subdomain = $1 if subdomain == WOOT_OFF && subdomain =~ /https?:\/\/([^\.]+)\.#{DOMAIN}/
    yield scrape(subdomain) unless subdomain.nil?
  end
end