Class: HTMLProofer::Cache

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/html-proofer/cache.rb

Constant Summary collapse

DEFAULT_STORAGE_DIR =
File.join('tmp', '.htmlproofer')
DEFAULT_CACHE_FILE_NAME =
'cache.log'
URI_REGEXP =
URI::DEFAULT_PARSER.make_regexp

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

#create_nokogiri, #pluralize, #swap

Constructor Details

#initialize(logger, options) ⇒ Cache

Returns a new instance of Cache.



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/html-proofer/cache.rb', line 18

def initialize(logger, options)
  @logger = logger
  @cache_log = {}

  @cache_datetime = DateTime.now
  @cache_time = @cache_datetime.to_time

  if options.nil? || options.empty?
    define_singleton_method('use_cache?') { false }
  else
    define_singleton_method('use_cache?') { true }
    setup_cache!(options)
    @parsed_timeframe = parsed_timeframe(options[:timeframe])
  end
end

Instance Attribute Details

#cache_fileObject (readonly)

Returns the value of attribute cache_file.



16
17
18
# File 'lib/html-proofer/cache.rb', line 16

def cache_file
  @cache_file
end

#cache_logObject (readonly)

Returns the value of attribute cache_log.



16
17
18
# File 'lib/html-proofer/cache.rb', line 16

def cache_log
  @cache_log
end

#existsObject (readonly)

Returns the value of attribute exists.



16
17
18
# File 'lib/html-proofer/cache.rb', line 16

def exists
  @exists
end

#storage_dirObject (readonly)

Returns the value of attribute storage_dir.



16
17
18
# File 'lib/html-proofer/cache.rb', line 16

def storage_dir
  @storage_dir
end

Instance Method Details

#add(url, filenames, status, msg = '') ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/html-proofer/cache.rb', line 65

def add(url, filenames, status, msg = '')
  return unless use_cache?

  data = {
    time: @cache_time,
    filenames: filenames,
    status: status,
    message: msg
  }

  @cache_log[clean_url(url)] = data
end

#clean_url(url) ⇒ Object



154
155
156
# File 'lib/html-proofer/cache.rb', line 154

def clean_url(url)
  unescape_url(url)
end

#detect_url_changes(found, type) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/html-proofer/cache.rb', line 78

def detect_url_changes(found, type)
  found_urls = found.keys.map { |url| clean_url(url) }

  # if there were no urls, bail
  return {} if found_urls.empty?

  existing_urls = @cache_log.keys.map { |url| clean_url(url) }

  # prepare to add new URLs detected
  additions = found.reject do |url, _|
    url = clean_url(url)
    if existing_urls.include?(url)
      true
    else
      @logger.log :debug, "Adding #{url} to cache check"
      false
    end
  end

  new_link_count = additions.length
  new_link_text = pluralize(new_link_count, 'link', 'links')
  @logger.log :info, "Adding #{new_link_text} to the cache..."

  # remove from cache URLs that no longer exist
  deletions = 0
  @cache_log.delete_if do |url, _|
    url = clean_url(url)

    if found_urls.include?(url)
      false
    elsif url_matches_type?(url, type)
      @logger.log :debug, "Removing #{url} from cache check"
      deletions += 1
      true
    end
  end

  del_link_text = pluralize(deletions, 'link', 'links')
  @logger.log :info, "Removing #{del_link_text} from the cache..."

  additions
end

#load?Boolean

Returns:

  • (Boolean)


128
129
130
# File 'lib/html-proofer/cache.rb', line 128

def load?
  @load.nil?
end

#parsed_timeframe(timeframe) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/html-proofer/cache.rb', line 48

def parsed_timeframe(timeframe)
  time, date = timeframe.match(/(\d+)(\D)/).captures
  time = time.to_i
  case date
  when 'M'
    time_ago(time, :months)
  when 'w'
    time_ago(time, :weeks)
  when 'd'
    time_ago(time, :days)
  when 'h'
    time_ago(time, :hours)
  else
    raise ArgumentError, "#{date} is not a valid timeframe!"
  end
end

#retrieve_urls(urls, type) ⇒ Object



132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/html-proofer/cache.rb', line 132

def retrieve_urls(urls, type)
  urls_to_check = detect_url_changes(urls, type)

  @cache_log.each_pair do |url, cache|
    next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip

    if url_matches_type?(url, type)
      urls_to_check[url] = cache['filenames'] # recheck expired links
    end
  end
  urls_to_check
end

#setup_cache!(options) ⇒ Object



158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/html-proofer/cache.rb', line 158

def setup_cache!(options)
  @storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR

  FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)

  cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME

  @cache_file = File.join(storage_dir, cache_file_name)

  return unless File.exist?(@cache_file)

  contents = File.read(@cache_file)
  @cache_log = contents.empty? ? {} : JSON.parse(contents)
end

#sizeObject



44
45
46
# File 'lib/html-proofer/cache.rb', line 44

def size
  @cache_log.length
end

#unescape_url(url) ⇒ Object

FIXME: it seems that Typhoeus actually acts on escaped URLs, but there’s no way to get at that information, and the cache stores unescaped URLs. Because of this, some links, such as github.com/search/issues?q=is:open+is:issue+fig are not matched as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig



150
151
152
# File 'lib/html-proofer/cache.rb', line 150

def unescape_url(url)
  Addressable::URI.unescape(url)
end

#urlsObject



40
41
42
# File 'lib/html-proofer/cache.rb', line 40

def urls
  @cache_log['urls'] || []
end

#within_timeframe?(time) ⇒ Boolean

Returns:

  • (Boolean)


34
35
36
37
38
# File 'lib/html-proofer/cache.rb', line 34

def within_timeframe?(time)
  return false if time.nil?

  (@parsed_timeframe..@cache_time).cover?(Time.parse(time))
end

#writeObject

TODO: Garbage performance–both the external and internal caches need access to this file. Write a proper versioned schema in the future



124
125
126
# File 'lib/html-proofer/cache.rb', line 124

def write
  File.write(cache_file, @cache_log.to_json)
end