Class: Crown::Amazon::Crawler
- Inherits:
-
Object
- Object
- Crown::Amazon::Crawler
- Defined in:
- lib/crown/amazon/crawler.rb
Overview
——————————————————————- #
Crawler
——————————————————————- #
Defined Under Namespace
Classes: Response
Instance Attribute Summary collapse
-
#entries ⇒ Object
————————————————————— # accessors ————————————————————— #.
-
#interval ⇒ Object
————————————————————— # accessors ————————————————————— #.
-
#threshold ⇒ Object
————————————————————— # accessors ————————————————————— #.
-
#uri_count ⇒ Object
————————————————————— # accessors ————————————————————— #.
Instance Method Summary collapse
-
#clear ⇒ Object
————————————————————— # clear ————————————————————— #.
-
#get(uris, options = {}) ⇒ Object
————————————————————— #.
-
#initialize ⇒ Crawler
constructor
————————————————————— # initialize ————————————————————— #.
Constructor Details
#initialize ⇒ Crawler
————————————————————— #
initialize
————————————————————— #
60 61 62 63 64 65 |
# File 'lib/crown/amazon/crawler.rb', line 60 def initialize() @threshold = 5 @interval = 5 @entries = Array.new @uri_count = 0 end |
Instance Attribute Details
#entries ⇒ Object
————————————————————— #
accessors
————————————————————— #
55 56 57 |
# File 'lib/crown/amazon/crawler.rb', line 55 def entries @entries end |
#interval ⇒ Object
————————————————————— #
accessors
————————————————————— #
55 56 57 |
# File 'lib/crown/amazon/crawler.rb', line 55 def interval @interval end |
#threshold ⇒ Object
————————————————————— #
accessors
————————————————————— #
55 56 57 |
# File 'lib/crown/amazon/crawler.rb', line 55 def threshold @threshold end |
#uri_count ⇒ Object
————————————————————— #
accessors
————————————————————— #
55 56 57 |
# File 'lib/crown/amazon/crawler.rb', line 55 def uri_count @uri_count end |
Instance Method Details
#clear ⇒ Object
————————————————————— #
clear
————————————————————— #
70 71 72 73 |
# File 'lib/crown/amazon/crawler.rb', line 70 def clear() @entries.clear @uri_count = 0 end |
#get(uris, options = {}) ⇒ Object
————————————————————— #
get
————————————————————— #
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/crown/amazon/crawler.rb', line 85 def get(uris, = {}) case uris when String then v = load(uris); when Array then v = uris; else return []; end asins = Hash.new v.each { |uri| begin exits = false ::Crown::Amazon::EntryList.new.asin(uri, ) { |asin| if (asins.include?(asin)) then asins[asin] += 1; else asins[asin] = 1; end exists = true } @uri_count += 1 if (exists) rescue Exception => e next end } asins.to_a.sort { |x, y| y[1] <=> x[1] }.each { |asin| break if (asin[1] < @threshold) response = ::Amazon::Ecs.item_lookup(asin[0], { :response_group => 'Medium' }) next if (response == nil || response.items.length == 0) x = response.first_item entry = Response.new entry.asin = alternate(x.get('ASIN'), '') entry.title = alternate(x.get('ItemAttributes/Title'), '') entry. = alternate(x.get_array('ItemAttributes/Author').join(','), '') entry.publisher = alternate(x.get('ItemAttributes/Publisher'), '') entry.price = alternate(x.get('ItemAttributes/ListPrice/Amount'), '') entry.date = alternate(x.get('ItemAttributes/ReleaseDate'), '') entry.date = alternate(x.get('ItemAttributes/PublicationDate'), '') if (entry.date == nil || entry.date.length == 0) entry.count = asin[1] yield entry if (block_given?) @entries.push(entry) sleep(@interval) } return @entries end |