Class: HTMLProofer::Cache

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/html-proofer/cache.rb

Constant Summary collapse

DEFAULT_STORAGE_DIR =
File.join('tmp', '.htmlproofer')
DEFAULT_CACHE_FILE_NAME =
'cache.log'.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

clean_content, create_nokogiri, #pluralize, swap

Constructor Details

#initialize(logger, options) ⇒ Cache


17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/html-proofer/cache.rb', line 17

def initialize(logger, options)
  @logger = logger
  @cache_log = {}

  if options.nil? || options.empty?
    define_singleton_method('use_cache?') { false }
  else
    define_singleton_method('use_cache?') { true }
    setup_cache!(options)
    @parsed_timeframe = parsed_timeframe(options[:timeframe])
  end

  @cache_time = Time.now
end

Instance Attribute Details

#cache_fileObject (readonly)

Returns the value of attribute cache_file


15
16
17
# File 'lib/html-proofer/cache.rb', line 15

def cache_file
  @cache_file
end

#cache_logObject (readonly)

Returns the value of attribute cache_log


15
16
17
# File 'lib/html-proofer/cache.rb', line 15

def cache_log
  @cache_log
end

#existsObject (readonly)

Returns the value of attribute exists


15
16
17
# File 'lib/html-proofer/cache.rb', line 15

def exists
  @exists
end

#storage_dirObject (readonly)

Returns the value of attribute storage_dir


15
16
17
# File 'lib/html-proofer/cache.rb', line 15

def storage_dir
  @storage_dir
end

Instance Method Details

#add(url, filenames, status, msg = '') ⇒ Object


61
62
63
64
65
66
67
68
69
70
# File 'lib/html-proofer/cache.rb', line 61

def add(url, filenames, status, msg = '')
  data = {
    time: @cache_time,
    filenames: filenames,
    status: status,
    message: msg
  }

  @cache_log[clean_url(url)] = data
end

#clean_url(url) ⇒ Object


140
141
142
# File 'lib/html-proofer/cache.rb', line 140

def clean_url(url)
  unescape_url(url)
end

#detect_url_changes(found) ⇒ Object


72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/html-proofer/cache.rb', line 72

def detect_url_changes(found)
  existing_urls = @cache_log.keys.map { |url| clean_url(url) }
  found_urls = found.keys.map { |url| clean_url(url) }

  # prepare to add new URLs detected
  additions = found.reject do |url, _|
    url = clean_url(url)
    if existing_urls.include?(url)
      true
    else
      @logger.log :debug, "Adding #{url} to cache check"
      false
    end
  end

  new_link_count = additions.length
  new_link_text = pluralize(new_link_count, 'link', 'links')
  @logger.log :info, "Adding #{new_link_text} to the cache..."

  # remove from cache URLs that no longer exist
  del = 0
  @cache_log.delete_if do |url, _|
    url = clean_url(url)
    if !found_urls.include?(url)
      @logger.log :debug, "Removing #{url} from cache check"
      del += 1
      true
    else
      false
    end
  end

  del_link_text = pluralize(del, 'link', 'links')
  @logger.log :info, "Removing #{del_link_text} from the cache..."

  additions
end

#load?Boolean


114
115
116
# File 'lib/html-proofer/cache.rb', line 114

def load?
  @load.nil?
end

#parsed_timeframe(timeframe) ⇒ Object


44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/html-proofer/cache.rb', line 44

def parsed_timeframe(timeframe)
  time, date = timeframe.match(/(\d+)(\D)/).captures
  time = time.to_f
  case date
  when 'M'
    time.months.ago
  when 'w'
    time.weeks.ago
  when 'd'
    time.days.ago
  when 'h'
    time.hours.ago
  else
    raise ArgumentError, "#{date} is not a valid timeframe!"
  end
end

#retrieve_urls(external_urls) ⇒ Object


118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/html-proofer/cache.rb', line 118

def retrieve_urls(external_urls)
  urls_to_check = detect_url_changes(external_urls)
  @cache_log.each_pair do |url, cache|
    if within_timeframe?(cache['time'])
      next if cache['message'].empty? # these were successes to skip
      urls_to_check[url] = cache['filenames'] # these are failures to retry
    else
      urls_to_check[url] = cache['filenames'] # pass or fail, recheck expired links
    end
  end
  urls_to_check
end

#setup_cache!(options) ⇒ Object


144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/html-proofer/cache.rb', line 144

def setup_cache!(options)
  @storage_dir = if options[:storage_dir]
                   options[:storage_dir]
                 else
                   DEFAULT_STORAGE_DIR
                 end

  FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)

  cache_file_name = if options[:cache_file]
                      options[:cache_file]
                    else
                      DEFAULT_CACHE_FILE_NAME
                    end

  @cache_file = File.join(storage_dir, cache_file_name)

  return unless File.exist?(cache_file)
  contents = File.read(cache_file)
  @cache_log = contents.empty? ? {} : JSON.parse(contents)
end

#sizeObject


40
41
42
# File 'lib/html-proofer/cache.rb', line 40

def size
  @cache_log.length
end

#unescape_url(url) ⇒ Object

FIXME: it seems that Typhoeus actually acts on escaped URLs, but there's no way to get at that information, and the cache stores unescaped URLs. Because of this, some links, such as github.com/search/issues?q=is:open+is:issue+fig are not matched as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig


136
137
138
# File 'lib/html-proofer/cache.rb', line 136

def unescape_url(url)
  Addressable::URI.unescape(url)
end

#urlsObject


36
37
38
# File 'lib/html-proofer/cache.rb', line 36

def urls
  @cache_log['urls'] || []
end

#within_timeframe?(time) ⇒ Boolean


32
33
34
# File 'lib/html-proofer/cache.rb', line 32

def within_timeframe?(time)
  (@parsed_timeframe..@cache_time).cover?(time)
end

#writeObject


110
111
112
# File 'lib/html-proofer/cache.rb', line 110

def write
  File.write(cache_file, @cache_log.to_json)
end