Class: Crown::Amazon::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/crown/amazon/crawler.rb

Overview

——————————————————————- #

Crawler

指定した URL に記述されてある Amazon  ASIN コードを抽出し,
それらの商品情報を取得するためのクラス.

——————————————————————- #

Defined Under Namespace

Classes: Response

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeCrawler

————————————————————— #

initialize

————————————————————— #



60
61
62
63
64
65
# File 'lib/crown/amazon/crawler.rb', line 60

def initialize()
    @threshold = 5
    @interval = 5
    @entries = Array.new
    @uri_count = 0
end

Instance Attribute Details

#entriesObject

————————————————————— #

accessors

————————————————————— #



55
56
57
# File 'lib/crown/amazon/crawler.rb', line 55

def entries
  @entries
end

#intervalObject

————————————————————— #

accessors

————————————————————— #



55
56
57
# File 'lib/crown/amazon/crawler.rb', line 55

def interval
  @interval
end

#thresholdObject

————————————————————— #

accessors

————————————————————— #



55
56
57
# File 'lib/crown/amazon/crawler.rb', line 55

def threshold
  @threshold
end

#uri_countObject

————————————————————— #

accessors

————————————————————— #



55
56
57
# File 'lib/crown/amazon/crawler.rb', line 55

def uri_count
  @uri_count
end

Instance Method Details

#clearObject

————————————————————— #

clear

————————————————————— #



70
71
72
73
# File 'lib/crown/amazon/crawler.rb', line 70

def clear()
    @entries.clear
    @uri_count = 0
end

#get(uris, options = {}) ⇒ Object

————————————————————— #

get

指定した URL に記述されてある Amazon  ASIN コードを抽出し,
それらの商品情報を取得する.uris には,クロールする URL 
配列,または URL 一覧が記載されてあるファイルへのパスを指定
する.

————————————————————— #



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/crown/amazon/crawler.rb', line 85

def get(uris, options = {})
    case uris
        when String then v = load(uris);
        when Array  then v = uris;
        else return [];
    end
    
    asins = Hash.new
    v.each { |uri|
        begin
            exits = false
            ::Crown::Amazon::EntryList.new.asin(uri, options) { |asin|
                if (asins.include?(asin)) then asins[asin] += 1;
                else asins[asin] = 1;
                end
                exists = true
            }
            @uri_count += 1 if (exists)
        rescue Exception => e
            next
        end
    }
    
    asins.to_a.sort { |x, y| y[1] <=> x[1] }.each { |asin|
        break if (asin[1] < @threshold)
        response = ::Amazon::Ecs.item_lookup(asin[0], { :response_group => 'Medium' })
        next if (response == nil || response.items.length == 0)
        
        x = response.first_item
        entry = Response.new
        entry.asin      = alternate(x.get('ASIN'), '')
        entry.title     = alternate(x.get('ItemAttributes/Title'), '')
        entry.author    = alternate(x.get_array('ItemAttributes/Author').join(','), '')
        entry.publisher = alternate(x.get('ItemAttributes/Publisher'), '')
        entry.price     = alternate(x.get('ItemAttributes/ListPrice/Amount'), '')
        entry.date      = alternate(x.get('ItemAttributes/ReleaseDate'), '')
        entry.date      = alternate(x.get('ItemAttributes/PublicationDate'), '') if (entry.date == nil || entry.date.length == 0)
        entry.count     = asin[1]
        
        yield entry if (block_given?)
        @entries.push(entry)
        sleep(@interval)
    }
    
    return @entries
end