Class: HTMLProofer::Cache

Inherits:

Object

Object
HTMLProofer::Cache

show all

Includes:: Utils

Defined in:: lib/html-proofer/cache.rb

Constant Summary collapse

DEFAULT_STORAGE_DIR =

File.join('tmp', '.htmlproofer')

DEFAULT_CACHE_FILE_NAME =

'cache.log'

Instance Attribute Summary collapse

#cache_file ⇒ Object readonly

Returns the value of attribute cache_file.
#cache_log ⇒ Object readonly

Returns the value of attribute cache_log.
#exists ⇒ Object readonly

Returns the value of attribute exists.
#storage_dir ⇒ Object readonly

Returns the value of attribute storage_dir.

Instance Method Summary collapse

#add(url, filenames, status, msg = '') ⇒ Object
#clean_url(url) ⇒ Object
#detect_url_changes(found) ⇒ Object
#initialize(logger, options) ⇒ Cache constructor

A new instance of Cache.
#load? ⇒ Boolean
#parsed_timeframe(timeframe) ⇒ Object
#retrieve_urls(external_urls) ⇒ Object
#setup_cache!(options) ⇒ Object
#size ⇒ Object
#unescape_url(url) ⇒ Object

FIXME: it seems that Typhoeus actually acts on escaped URLs, but there’s no way to get at that information, and the cache stores unescaped URLs.
#urls ⇒ Object
#within_timeframe?(time) ⇒ Boolean
#write ⇒ Object

Methods included from Utils

#create_nokogiri, #pluralize, #swap

Constructor Details

#initialize(logger, options) ⇒ `Cache`

Returns a new instance of Cache.

# File 'lib/html-proofer/cache.rb', line 16

def initialize(logger, options)
  @logger = logger
  @cache_log = {}

  @cache_datetime = DateTime.now
  @cache_time = @cache_datetime.to_time

  if options.nil? || options.empty?
    define_singleton_method('use_cache?') { false }
  else
    define_singleton_method('use_cache?') { true }
    setup_cache!(options)
    @parsed_timeframe = parsed_timeframe(options[:timeframe])
  end
end

Instance Attribute Details

#cache_file ⇒ `Object` (readonly)

Returns the value of attribute cache_file.



14
15
16

# File 'lib/html-proofer/cache.rb', line 14

def cache_file
  @cache_file
end

#cache_log ⇒ `Object` (readonly)

Returns the value of attribute cache_log.



14
15
16

# File 'lib/html-proofer/cache.rb', line 14

def cache_log
  @cache_log
end

#exists ⇒ `Object` (readonly)

Returns the value of attribute exists.



14
15
16

# File 'lib/html-proofer/cache.rb', line 14

def exists
  @exists
end

#storage_dir ⇒ `Object` (readonly)

Returns the value of attribute storage_dir.



14
15
16

# File 'lib/html-proofer/cache.rb', line 14

def storage_dir
  @storage_dir
end

Instance Method Details

#add(url, filenames, status, msg = '') ⇒ `Object`

# File 'lib/html-proofer/cache.rb', line 61

def add(url, filenames, status, msg = '')
  data = {
    time: @cache_time,
    filenames: filenames,
    status: status,
    message: msg
  }

  @cache_log[clean_url(url)] = data
end

#clean_url(url) ⇒ `Object`



139
140
141

# File 'lib/html-proofer/cache.rb', line 139

def clean_url(url)
  unescape_url(url)
end

#detect_url_changes(found) ⇒ `Object`

# File 'lib/html-proofer/cache.rb', line 72

def detect_url_changes(found)
  existing_urls = @cache_log.keys.map { |url| clean_url(url) }
  found_urls = found.keys.map { |url| clean_url(url) }

  # prepare to add new URLs detected
  additions = found.reject do |url, _|
    url = clean_url(url)
    if existing_urls.include?(url)
      true
    else
      @logger.log :debug, "Adding #{url} to cache check"
      false
    end
  end

  new_link_count = additions.length
  new_link_text = pluralize(new_link_count, 'link', 'links')
  @logger.log :info, "Adding #{new_link_text} to the cache..."

  # remove from cache URLs that no longer exist
  del = 0
  @cache_log.delete_if do |url, _|
    url = clean_url(url)
    if !found_urls.include?(url)
      @logger.log :debug, "Removing #{url} from cache check"
      del += 1
      true
    else
      false
    end
  end

  del_link_text = pluralize(del, 'link', 'links')
  @logger.log :info, "Removing #{del_link_text} from the cache..."

  additions
end

#load? ⇒ `Boolean`

Returns:

(Boolean)



114
115
116

# File 'lib/html-proofer/cache.rb', line 114

def load?
  @load.nil?
end

#parsed_timeframe(timeframe) ⇒ `Object`

# File 'lib/html-proofer/cache.rb', line 44

def parsed_timeframe(timeframe)
  time, date = timeframe.match(/(\d+)(\D)/).captures
  time = time.to_i
  case date
  when 'M'
    time_ago(time, :months)
  when 'w'
    time_ago(time, :weeks)
  when 'd'
    time_ago(time, :days)
  when 'h'
    time_ago(time, :hours)
  else
    raise ArgumentError, "#{date} is not a valid timeframe!"
  end
end

#retrieve_urls(external_urls) ⇒ `Object`

# File 'lib/html-proofer/cache.rb', line 118

def retrieve_urls(external_urls)
  urls_to_check = detect_url_changes(external_urls)
  @cache_log.each_pair do |url, cache|
    if within_timeframe?(cache['time'])
      next if cache['message'].empty? # these were successes to skip
    else
      urls_to_check[url] = cache['filenames'] # recheck expired links
    end
  end
  urls_to_check
end

#setup_cache!(options) ⇒ `Object`

# File 'lib/html-proofer/cache.rb', line 143

def setup_cache!(options)
  @storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR

  FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)

  cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME

  @cache_file = File.join(storage_dir, cache_file_name)

  return unless File.exist?(cache_file)

  contents = File.read(cache_file)
  @cache_log = contents.empty? ? {} : JSON.parse(contents)
end

#size ⇒ `Object`



40
41
42

# File 'lib/html-proofer/cache.rb', line 40

def size
  @cache_log.length
end

#unescape_url(url) ⇒ `Object`

FIXME: it seems that Typhoeus actually acts on escaped URLs, but there’s no way to get at that information, and the cache stores unescaped URLs. Because of this, some links, such as github.com/search/issues?q=is:open+is:issue+fig are not matched as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig



135
136
137

# File 'lib/html-proofer/cache.rb', line 135

def unescape_url(url)
  Addressable::URI.unescape(url)
end

#urls ⇒ `Object`



36
37
38

# File 'lib/html-proofer/cache.rb', line 36

def urls
  @cache_log['urls'] || []
end

#within_timeframe?(time) ⇒ `Boolean`

Returns:

(Boolean)



32
33
34

# File 'lib/html-proofer/cache.rb', line 32

def within_timeframe?(time)
  (@parsed_timeframe..@cache_time).cover?(Time.parse(time))
end

#write ⇒ `Object`



110
111
112

# File 'lib/html-proofer/cache.rb', line 110

def write
  File.write(cache_file, @cache_log.to_json)
end

Class: HTMLProofer::Cache

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

Constructor Details

#initialize(logger, options) ⇒ Cache

Instance Attribute Details

#cache_file ⇒ Object (readonly)

#cache_log ⇒ Object (readonly)

#exists ⇒ Object (readonly)

#storage_dir ⇒ Object (readonly)

Instance Method Details

#add(url, filenames, status, msg = '') ⇒ Object

#clean_url(url) ⇒ Object

#detect_url_changes(found) ⇒ Object

#load? ⇒ Boolean

#parsed_timeframe(timeframe) ⇒ Object

#retrieve_urls(external_urls) ⇒ Object

#setup_cache!(options) ⇒ Object

#size ⇒ Object

#unescape_url(url) ⇒ Object

#urls ⇒ Object

#within_timeframe?(time) ⇒ Boolean

#write ⇒ Object