Class: HTML::Proofer::Cache

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/html/proofer/cache.rb

Constant Summary collapse

FILENAME =
File.join(STORAGE_DIR, 'cache.log')

Constants included from Utils

Utils::STORAGE_DIR

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

clean_content, create_nokogiri, #pluralize, swap

Constructor Details

#initialize(logger, options) ⇒ Cache

Returns a new instance of Cache.



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/html/proofer/cache.rb', line 17

def initialize(logger, options)
  @logger = logger
  @cache_log = {}

  if options.nil? || options.empty?
    @load = false
  else
    @load = true
    @parsed_timeframe = parsed_timeframe(options[:timeframe] || '30d')
  end
  @cache_time = Time.now

  if File.exist?(FILENAME)
    @exists = true
    contents = File.read(FILENAME)
    @cache_log = contents.empty? ? {} : JSON.parse(contents)
  else
    @exists = false
  end
end

Instance Attribute Details

#cache_logObject

Returns the value of attribute cache_log.



15
16
17
# File 'lib/html/proofer/cache.rb', line 15

def cache_log
  @cache_log
end

#cache_timeObject

Returns the value of attribute cache_time.



15
16
17
# File 'lib/html/proofer/cache.rb', line 15

def cache_time
  @cache_time
end

#existsObject

Returns the value of attribute exists.



15
16
17
# File 'lib/html/proofer/cache.rb', line 15

def exists
  @exists
end

#loadObject

Returns the value of attribute load.



15
16
17
# File 'lib/html/proofer/cache.rb', line 15

def load
  @load
end

Instance Method Details

#add(url, filenames, status, msg = '') ⇒ Object



63
64
65
66
67
68
69
70
71
72
# File 'lib/html/proofer/cache.rb', line 63

def add(url, filenames, status, msg = '')
  data = {
            :time => @cache_time,
            :filenames => filenames,
            :status => status,
            :message => msg
         }

  @cache_log[clean_url(url)] = data
end

#clean_url(url) ⇒ Object



136
137
138
# File 'lib/html/proofer/cache.rb', line 136

def clean_url(url)
  slashless_url(unescape_url(url))
end

#detect_url_changes(found) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/html/proofer/cache.rb', line 74

def detect_url_changes(found)
  existing_urls = @cache_log.keys.map { |url| clean_url(url) }
  found_urls = found.keys.map { |url| clean_url(url) }

  # prepare to add new URLs detected
  additions = found.reject do |url, _|
    url = clean_url(url)
    if existing_urls.include?(url)
      true
    else
      @logger.log :debug, :yellow, "Adding #{url} to cache check"
      false
    end
  end

  new_link_count = additions.length
  new_link_text = pluralize(new_link_count, 'link', 'links')
  @logger.log :info, :blue, "Adding #{new_link_text} to the cache..."

  # remove from cache URLs that no longer exist
  del = 0
  @cache_log.delete_if do |url, _|
    url = clean_url(url)
    if !found_urls.include?(url)
      @logger.log :debug, :yellow, "Removing #{url} from cache check"
      del += 1
      true
    else
      false
    end
  end

  del_link_text = pluralize(del, 'link', 'links')
  @logger.log :info, :blue, "Removing #{del_link_text} from the cache..."

  additions
end

#load?Boolean

Returns:

  • (Boolean)


116
117
118
# File 'lib/html/proofer/cache.rb', line 116

def load?
  @load.nil?
end

#parsed_timeframe(timeframe) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/html/proofer/cache.rb', line 46

def parsed_timeframe(timeframe)
  time, date = timeframe.match(/(\d+)(\D)/).captures
  time = time.to_f
  case date
  when 'M'
    time.months.ago
  when 'w'
    time.weeks.ago
  when 'd'
    time.days.ago
  when 'h'
    time.hours.ago
  else
    fail ArgumentError, "#{date} is not a valid timeframe!"
  end
end

#slashless_url(url) ⇒ Object

FIXME: there seems to be some discrepenacy where Typhoeus occasionally adds a trailing slash to URL strings, which causes issues with the cache



123
124
125
# File 'lib/html/proofer/cache.rb', line 123

def slashless_url(url)
  url.chomp('/')
end

#unescape_url(url) ⇒ Object

FIXME: it seems that Typhoeus actually acts on escaped URLs, but there’s no way to get at that information, and the cache stores unescaped URLs. Because of this, some links, such as github.com/search/issues?q=is:open+is:issue+fig are not matched as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig



132
133
134
# File 'lib/html/proofer/cache.rb', line 132

def unescape_url(url)
  Addressable::URI.unescape(url)
end

#urlsObject



42
43
44
# File 'lib/html/proofer/cache.rb', line 42

def urls
  @cache_log['urls'] || []
end

#within_timeframe?(time) ⇒ Boolean

Returns:

  • (Boolean)


38
39
40
# File 'lib/html/proofer/cache.rb', line 38

def within_timeframe?(time)
  (@parsed_timeframe..@cache_time).cover?(time)
end

#writeObject



112
113
114
# File 'lib/html/proofer/cache.rb', line 112

def write
  File.write(FILENAME, @cache_log.to_json)
end