Class: WebLoader::Command
- Inherits:
-
Object
- Object
- WebLoader::Command
- Includes:
- Utils
- Defined in:
- lib/web_loader/command.rb
Constant Summary collapse
- USER_AGENT =
"WebLoader"- CACHE_DIR =
'./cache'- DEFAULT_RETRY =
3- DEFAULT_REDIRECT =
10- DEFAULT_SLEEP =
10- CACHE_LIMIT =
キャッシュが有効な秒数。デフォルトは1時間とする
3600
Constants included from Utils
Instance Attribute Summary collapse
-
#always_write_cache ⇒ Object
Returns the value of attribute always_write_cache.
-
#binary ⇒ Object
Returns the value of attribute binary.
-
#cache_dir ⇒ Object
Returns the value of attribute cache_dir.
-
#cache_limit ⇒ Object
Returns the value of attribute cache_limit.
-
#driver ⇒ Object
Returns the value of attribute driver.
-
#load_cache_page ⇒ Object
readonly
Returns the value of attribute load_cache_page.
-
#logger ⇒ Object
Returns the value of attribute logger.
-
#response ⇒ Object
readonly
Returns the value of attribute response.
-
#use_cache ⇒ Object
Returns the value of attribute use_cache.
-
#user_agent ⇒ Object
Returns the value of attribute user_agent.
-
#verbose ⇒ Object
Returns the value of attribute verbose.
Class Method Summary collapse
Instance Method Summary collapse
-
#initialize(driver = ::WebLoader::Drivers::HttpDriver.new) ⇒ Command
constructor
A new instance of Command.
- #load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0) ⇒ Object
- #load_retry(url, retry_count = DEFAULT_RETRY) ⇒ Object
Methods included from Utils
detect_charset, to_redirect_url, toutf8, toutf8_charset
Constructor Details
#initialize(driver = ::WebLoader::Drivers::HttpDriver.new) ⇒ Command
Returns a new instance of Command.
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/web_loader/command.rb', line 25 def initialize(driver = ::WebLoader::Drivers::HttpDriver.new) @use_cache = true @load_cache_page = false #キャッシュを読み込んだかどうか @cache_dir = File.(CACHE_DIR) @user_agent = "#{USER_AGENT}/#{VERSION}" @binary = false @verbose = false @cache_limit = CACHE_LIMIT @always_write_cache = false @response = nil @logger = nil # ドライバーのセットアップ @driver = driver end |
Instance Attribute Details
#always_write_cache ⇒ Object
Returns the value of attribute always_write_cache.
44 45 46 |
# File 'lib/web_loader/command.rb', line 44 def always_write_cache @always_write_cache end |
#binary ⇒ Object
Returns the value of attribute binary.
42 43 44 |
# File 'lib/web_loader/command.rb', line 42 def binary @binary end |
#cache_dir ⇒ Object
Returns the value of attribute cache_dir.
42 43 44 |
# File 'lib/web_loader/command.rb', line 42 def cache_dir @cache_dir end |
#cache_limit ⇒ Object
Returns the value of attribute cache_limit.
43 44 45 |
# File 'lib/web_loader/command.rb', line 43 def cache_limit @cache_limit end |
#driver ⇒ Object
Returns the value of attribute driver.
45 46 47 |
# File 'lib/web_loader/command.rb', line 45 def driver @driver end |
#load_cache_page ⇒ Object (readonly)
Returns the value of attribute load_cache_page.
41 42 43 |
# File 'lib/web_loader/command.rb', line 41 def load_cache_page @load_cache_page end |
#logger ⇒ Object
Returns the value of attribute logger.
47 48 49 |
# File 'lib/web_loader/command.rb', line 47 def logger @logger end |
#response ⇒ Object (readonly)
Returns the value of attribute response.
46 47 48 |
# File 'lib/web_loader/command.rb', line 46 def response @response end |
#use_cache ⇒ Object
Returns the value of attribute use_cache.
42 43 44 |
# File 'lib/web_loader/command.rb', line 42 def use_cache @use_cache end |
#user_agent ⇒ Object
Returns the value of attribute user_agent.
42 43 44 |
# File 'lib/web_loader/command.rb', line 42 def user_agent @user_agent end |
#verbose ⇒ Object
Returns the value of attribute verbose.
42 43 44 |
# File 'lib/web_loader/command.rb', line 42 def verbose @verbose end |
Class Method Details
.save_image(url, file) ⇒ Object
16 17 18 19 20 21 22 23 |
# File 'lib/web_loader/command.rb', line 16 def self.save_image(url, file) # キャッシュせず単に保存する cmd= Command.new cmd.use_cache = false cmd.binary = true content = cmd.load(url) File.binwrite(file, content) end |
Instance Method Details
#load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/web_loader/command.rb', line 53 def load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0) raise ArgumentError, 'HTTP redirect too deep' if redirect_count == 0 log("Load: #{url}") ##### キャッシュの読み込み @load_cache_page = false content = try_load_cache(url) if content log("Load cache: #{url}") @load_cache_page = true return content end ##### サーバーからロード log("Load server: #{url}") begin @driver.user_agent = @user_agent @driver.binary = @binary @response = @driver.fetch(url) rescue Net::ReadTimeout # タイムアウトした場合リトライ可能ならばsleepした後に再度ロード実行 log("Read timeout: #{url}") if retry_count > 0 sleep DEFAULT_SLEEP return load(url, redirect_count , retry_count - 1) end end ##### レスポンスの処理 result = nil if response.ok? body = @response.body if @use_cache || @always_write_cache log("Write cache: #{url}") Cache.write(@cache_dir, url, @response.status, body) end result = body elsif response.redirect? result = load(to_redirect_url(URI.parse(url), @response.headers['location']), redirect_count - 1) elsif response.rate_limited? # 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行 if retry_count > 0 # HTTPTooManyRequestsならばretry-afterで指定された値を取得。 sleep_for = @response.header['retry-after'].to_i + 10 log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).") sleep sleep_for result = load(url, redirect_count , retry_count - 1) end else # それ以外は対応した例外を発生 log("error #{url}", true) end result end |
#load_retry(url, retry_count = DEFAULT_RETRY) ⇒ Object
49 50 51 |
# File 'lib/web_loader/command.rb', line 49 def load_retry(url, retry_count = DEFAULT_RETRY) load(url, DEFAULT_REDIRECT, retry_count) end |