Class: Woot
- Inherits:
-
Object
- Object
- Woot
- Defined in:
- lib/woot.rb
Constant Summary collapse
- DOMAIN =
'woot.com'
- SELLOUT_DOMAIN =
'shopping.yahoo.com'
- WOOT_OFF =
'woot-off'
- TWITTER_IDS =
{ 'kids' => 66527200, 'shirt' => 7696162, 'sellout' => 15458304, 'wine' => 1647621, 'www' => 734493, WOOT_OFF => 20557892 }
- SUBDOMAINS =
TWITTER_IDS.keys - [WOOT_OFF]
Class Method Summary collapse
- .scrape(subdomain = :www) ⇒ Object
- .selectors(subdomain = :www) ⇒ Object
- .stop ⇒ Object
- .stream(twitter_username, twitter_password) ⇒ Object
Class Method Details
.scrape(subdomain = :www) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/woot.rb', line 20 def self.scrape(subdomain = :www) url = "http://#{subdomain}.#{DOMAIN}/" if subdomain.to_s == 'sellout' url = Scraper.define do process_first "div.bd>div.img>a", :url => '@href' result :url end.scrape(Net::HTTP.get(SELLOUT_DOMAIN, '/')).gsub('&', '&') end response = Net::HTTP.get(URI.parse(url)) selectors = self.selectors(subdomain) Scraper.define do result *(selectors.inject([]) do |array, (pattern, results)| process_first pattern, results array += results.keys end) end.scrape(response) end |
.selectors(subdomain = :www) ⇒ Object
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/woot.rb', line 39 def self.selectors(subdomain = :www) @selectors = { '*' => { :subdomain => proc { |element| subdomain.to_s } }, 'h2.fn' => { :title => :text }, 'span.amount' => { :price => :text }, 'ul#shippingOptions' => { :shipping => :text }, 'img.photo' => { :image => '@src' }, 'div.hproduct>a' => { :alternate_image => proc { |element| $1 if element.attributes['href'] =~ /\('([^']+)'\);/ } }, 'a.url' => { :url => '@href' }, 'li.comments>a' => { :comments_url => '@href', :comments_count => proc { |element| element.children[0].content.gsub(/\D/, '') } }, 'div.story>h2' => { :header => :text }, 'div.story>h3' => { :sub_header => :text }, 'div.writeUp' => { :writeup => :text }, 'div.specs' => { :specs => :text }, 'div.productDescription>dl' => { :details => :text }, 'a#ctl00_ctl00_ContentPlaceHolderLeadIn_ContentPlaceHolderLeadIn_SaleControl_HyperLinkWantOne' => { :purchase_url => proc do |element| "http://#{subdomain}.#{DOMAIN}#{element.attributes['href'].gsub(/^https?:\/\/[^\/]+/, '')}" if element.attributes.has_key?('href') end } } end |
.stop ⇒ Object
68 69 70 |
# File 'lib/woot.rb', line 68 def self.stop TweetStream::Client.stop end |
.stream(twitter_username, twitter_password) ⇒ Object
60 61 62 63 64 65 66 |
# File 'lib/woot.rb', line 60 def self.stream(twitter_username, twitter_password) TweetStream::Client.new(twitter_username, twitter_password).follow(*TWITTER_IDS.values) do |status| subdomain = TWITTER_IDS.index(status.user.id) subdomain = $1 if subdomain == WOOT_OFF && subdomain =~ /https?:\/\/([^\.]+)\.#{DOMAIN}/ yield scrape(subdomain) unless subdomain.nil? end end |