Class: WebLoader::Command

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/web_loader/command.rb

Constant Summary collapse

USER_AGENT =
"WebLoader"
CACHE_DIR =
'./cache'
DEFAULT_RETRY =
3
DEFAULT_REDIRECT =
10
DEFAULT_SLEEP =
10
CACHE_LIMIT =

キャッシュが有効な秒数。デフォルトは1時間とする

3600

Constants included from Utils

Utils::UTF_8

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Utils

detect_charset, to_redirect_url, toutf8, toutf8_charset

Constructor Details

#initialize(driver = ::WebLoader::Drivers::HttpDriver.new) ⇒ Command

Returns a new instance of Command.



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/web_loader/command.rb', line 25

def initialize(driver = ::WebLoader::Drivers::HttpDriver.new)
  @use_cache = true
  @load_cache_page = false #キャッシュを読み込んだかどうか
  @cache_dir = File.expand_path(CACHE_DIR)
  @user_agent = "#{USER_AGENT}/#{VERSION}"
  @binary = false
  @verbose = false
  @cache_limit = CACHE_LIMIT
  @always_write_cache = false
  @response = nil
  @logger = nil

  # ドライバーのセットアップ
  @driver = driver
end

Instance Attribute Details

#always_write_cacheObject

Returns the value of attribute always_write_cache.



44
45
46
# File 'lib/web_loader/command.rb', line 44

def always_write_cache
  @always_write_cache
end

#binaryObject

Returns the value of attribute binary.



42
43
44
# File 'lib/web_loader/command.rb', line 42

def binary
  @binary
end

#cache_dirObject

Returns the value of attribute cache_dir.



42
43
44
# File 'lib/web_loader/command.rb', line 42

def cache_dir
  @cache_dir
end

#cache_limitObject

Returns the value of attribute cache_limit.



43
44
45
# File 'lib/web_loader/command.rb', line 43

def cache_limit
  @cache_limit
end

#driverObject

Returns the value of attribute driver.



45
46
47
# File 'lib/web_loader/command.rb', line 45

def driver
  @driver
end

#load_cache_pageObject (readonly)

Returns the value of attribute load_cache_page.



41
42
43
# File 'lib/web_loader/command.rb', line 41

def load_cache_page
  @load_cache_page
end

#loggerObject

Returns the value of attribute logger.



47
48
49
# File 'lib/web_loader/command.rb', line 47

def logger
  @logger
end

#responseObject (readonly)

Returns the value of attribute response.



46
47
48
# File 'lib/web_loader/command.rb', line 46

def response
  @response
end

#use_cacheObject

Returns the value of attribute use_cache.



42
43
44
# File 'lib/web_loader/command.rb', line 42

def use_cache
  @use_cache
end

#user_agentObject

Returns the value of attribute user_agent.



42
43
44
# File 'lib/web_loader/command.rb', line 42

def user_agent
  @user_agent
end

#verboseObject

Returns the value of attribute verbose.



42
43
44
# File 'lib/web_loader/command.rb', line 42

def verbose
  @verbose
end

Class Method Details

.save_image(url, file) ⇒ Object



16
17
18
19
20
21
22
23
# File 'lib/web_loader/command.rb', line 16

def self.save_image(url, file)
  # キャッシュせず単に保存する
  cmd= Command.new
  cmd.use_cache = false
  cmd.binary = true
  content = cmd.load(url)
  File.binwrite(file, content)
end

Instance Method Details

#load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0) ⇒ Object

Raises:



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/web_loader/command.rb', line 53

def load(url, redirect_count = DEFAULT_REDIRECT, retry_count = 0)
  raise ArgumentError, 'HTTP redirect too deep' if redirect_count == 0
  log("Load: #{url}")

  ##### キャッシュの読み込み
  @load_cache_page = false
  content = try_load_cache(url)
  if content
    log("Load cache: #{url}")
    @load_cache_page = true
    return content
  end

  ##### サーバーからロード
  log("Load server: #{url}")
  begin
    @driver.user_agent = @user_agent
    @driver.binary = @binary
    @response = @driver.fetch(url)
  rescue Net::ReadTimeout
    # タイムアウトした場合リトライ可能ならばsleepした後に再度ロード実行
    log("Read timeout: #{url}")
    if retry_count > 0
      sleep DEFAULT_SLEEP
      return load(url, redirect_count , retry_count - 1)
    end
  end

  ##### レスポンスの処理
  result = nil
  if response.ok?
    body = @response.body
    if @use_cache || @always_write_cache
      log("Write cache: #{url}")
      Cache.write(@cache_dir, url, @response.status, body)
    end
    result = body
  elsif response.redirect?
    result = load(to_redirect_url(URI.parse(url), @response.headers['location']), redirect_count - 1)
  elsif response.rate_limited?
    # 上記以外のレスポンスの場合、リトライ可能ならばsleepした後に再度ロード実行
    if retry_count > 0
      # HTTPTooManyRequestsならばretry-afterで指定された値を取得。
      sleep_for = @response.header['retry-after'].to_i + 10
      log("Rate limit: #{uri} #{@response.header.to_hash} (429 Too Many Requests). Sleeping #{sleep_for} seconds and retry (##{retry_count}).")
      sleep sleep_for
      result = load(url, redirect_count , retry_count - 1)
    end
  else
    # それ以外は対応した例外を発生
    log("error #{url}", true)
  end
  result
end

#load_retry(url, retry_count = DEFAULT_RETRY) ⇒ Object



49
50
51
# File 'lib/web_loader/command.rb', line 49

def load_retry(url, retry_count = DEFAULT_RETRY)
  load(url, DEFAULT_REDIRECT, retry_count)
end