Class: HTMLProofer::Cache
- Inherits:
-
Object
- Object
- HTMLProofer::Cache
show all
- Includes:
- Utils
- Defined in:
- lib/html-proofer/cache.rb
Constant Summary
collapse
- CACHE_LOG =
File.join(STORAGE_DIR, 'cache.log')
Constants included
from Utils
Utils::STORAGE_DIR
Instance Attribute Summary collapse
Instance Method Summary
collapse
Methods included from Utils
clean_content, create_nokogiri, #pluralize, swap
Constructor Details
#initialize(logger, options) ⇒ Cache
Returns a new instance of Cache.
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
# File 'lib/html-proofer/cache.rb', line 16
def initialize(logger, options)
@logger = logger
@cache_log = {}
if options.nil? || options.empty?
define_singleton_method('use_cache?') { false }
else
define_singleton_method('use_cache?') { true }
@parsed_timeframe = parsed_timeframe(options[:timeframe])
end
@cache_time = Time.now
if File.exist?(CACHE_LOG)
contents = File.read(CACHE_LOG)
@cache_log = contents.empty? ? {} : JSON.parse(contents)
end
end
|
Instance Attribute Details
#cache_log ⇒ Object
Returns the value of attribute cache_log.
14
15
16
|
# File 'lib/html-proofer/cache.rb', line 14
def cache_log
@cache_log
end
|
#exists ⇒ Object
Returns the value of attribute exists.
14
15
16
|
# File 'lib/html-proofer/cache.rb', line 14
def exists
@exists
end
|
Instance Method Details
#add(url, filenames, status, msg = '') ⇒ Object
64
65
66
67
68
69
70
71
72
73
|
# File 'lib/html-proofer/cache.rb', line 64
def add(url, filenames, status, msg = '')
data = {
:time => @cache_time,
:filenames => filenames,
:status => status,
:message => msg
}
@cache_log[clean_url(url)] = data
end
|
#clean_url(url) ⇒ Object
149
150
151
|
# File 'lib/html-proofer/cache.rb', line 149
def clean_url(url)
slashless_url(unescape_url(url))
end
|
#detect_url_changes(found) ⇒ Object
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
# File 'lib/html-proofer/cache.rb', line 75
def detect_url_changes(found)
existing_urls = @cache_log.keys.map { |url| clean_url(url) }
found_urls = found.keys.map { |url| clean_url(url) }
additions = found.reject do |url, _|
url = clean_url(url)
if existing_urls.include?(url)
true
else
@logger.log :debug, "Adding #{url} to cache check"
false
end
end
new_link_count = additions.length
new_link_text = pluralize(new_link_count, 'link', 'links')
@logger.log :info, "Adding #{new_link_text} to the cache..."
del = 0
@cache_log.delete_if do |url, _|
url = clean_url(url)
if !found_urls.include?(url)
@logger.log :debug, "Removing #{url} from cache check"
del += 1
true
else
false
end
end
del_link_text = pluralize(del, 'link', 'links')
@logger.log :info, "Removing #{del_link_text} from the cache..."
additions
end
|
#load? ⇒ Boolean
117
118
119
|
# File 'lib/html-proofer/cache.rb', line 117
def load?
@load.nil?
end
|
#parsed_timeframe(timeframe) ⇒ Object
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
# File 'lib/html-proofer/cache.rb', line 47
def parsed_timeframe(timeframe)
time, date = timeframe.match(/(\d+)(\D)/).captures
time = time.to_f
case date
when 'M'
time.months.ago
when 'w'
time.weeks.ago
when 'd'
time.days.ago
when 'h'
time.hours.ago
else
fail ArgumentError, "#{date} is not a valid timeframe!"
end
end
|
#retrieve_urls(external_urls) ⇒ Object
121
122
123
124
125
126
127
128
129
130
131
132
|
# File 'lib/html-proofer/cache.rb', line 121
def retrieve_urls(external_urls)
urls_to_check = detect_url_changes(external_urls)
@cache_log.each_pair do |url, cache|
if within_timeframe?(cache['time'])
next if cache['message'].empty? urls_to_check[url] = cache['filenames'] else
urls_to_check[url] = cache['filenames'] end
end
urls_to_check
end
|
#size ⇒ Object
43
44
45
|
# File 'lib/html-proofer/cache.rb', line 43
def size
@cache_log.length
end
|
#slashless_url(url) ⇒ Object
FIXME: there seems to be some discrepenacy where Typhoeus occasionally adds a trailing slash to URL strings, which causes issues with the cache
136
137
138
|
# File 'lib/html-proofer/cache.rb', line 136
def slashless_url(url)
url.chomp('/')
end
|
#unescape_url(url) ⇒ Object
FIXME: it seems that Typhoeus actually acts on escaped URLs, but there’s no way to get at that information, and the cache stores unescaped URLs. Because of this, some links, such as github.com/search/issues?q=is:open+is:issue+fig are not matched as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
145
146
147
|
# File 'lib/html-proofer/cache.rb', line 145
def unescape_url(url)
Addressable::URI.unescape(url)
end
|
#urls ⇒ Object
39
40
41
|
# File 'lib/html-proofer/cache.rb', line 39
def urls
@cache_log['urls'] || []
end
|
#within_timeframe?(time) ⇒ Boolean
35
36
37
|
# File 'lib/html-proofer/cache.rb', line 35
def within_timeframe?(time)
(@parsed_timeframe..@cache_time).cover?(time)
end
|
#write ⇒ Object
113
114
115
|
# File 'lib/html-proofer/cache.rb', line 113
def write
File.write(CACHE_LOG, @cache_log.to_json)
end
|