Class: Rwm::TaskCache

Inherits:
Object
  • Object
show all
Defined in:
lib/rwm/task_cache.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(workspace, graph) ⇒ TaskCache

Returns a new instance of TaskCache.



22
23
24
25
26
27
28
29
30
# File 'lib/rwm/task_cache.rb', line 22

def initialize(workspace, graph)
  @workspace = workspace
  @graph = graph
  @cache_dir = File.join(workspace.root, ".rwm", "cache")
  @content_hashes = {}
  @content_hash_mutex = Mutex.new
  @cache_declarations = {}
  @declarations_mutex = Mutex.new
end

Class Method Details

.clean(workspace, package_name: nil) ⇒ Object



11
12
13
14
15
16
17
18
19
20
# File 'lib/rwm/task_cache.rb', line 11

def self.clean(workspace, package_name: nil)
  cache_dir = File.join(workspace.root, ".rwm", "cache")
  return unless Dir.exist?(cache_dir)

  if package_name
    Dir.glob(File.join(cache_dir, "#{package_name}-*")).each { |f| File.delete(f) }
  else
    Dir.glob(File.join(cache_dir, "*")).each { |f| File.delete(f) }
  end
end

Instance Method Details

#cache_declarations(package) ⇒ Object

Discover cacheable task declarations by running ‘bundle exec rake rwm:cache_config`



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/rwm/task_cache.rb', line 129

def cache_declarations(package)
  @declarations_mutex.synchronize do
    return @cache_declarations[package.name] if @cache_declarations.key?(package.name)
  end

  Rwm.debug("cache declarations: discovering for #{package.name}")
  output, _, status = Open3.capture3("bundle", "exec", "rake", "rwm:cache_config", chdir: package.path)
  result = if status.success? && !output.strip.empty?
             JSON.parse(output.strip)
           else
             {}
           end

  @declarations_mutex.synchronize do
    @cache_declarations[package.name] = result
  end
rescue JSON::ParserError
  @declarations_mutex.synchronize do
    @cache_declarations[package.name] = {}
  end
end

#cacheable?(package, task) ⇒ Boolean

Returns true if the task is declared cacheable in the package’s Rakefile

Returns:



33
34
35
36
# File 'lib/rwm/task_cache.rb', line 33

def cacheable?(package, task)
  declarations = cache_declarations(package)
  declarations.key?(task)
end

#cached?(package, task) ⇒ Boolean

Returns true if the (package, task) pair is cached and inputs haven’t changed. Also verifies declared outputs exist (if any).

Returns:



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/rwm/task_cache.rb', line 40

def cached?(package, task)
  stored = read_stored_hash(package, task)
  unless stored
    Rwm.debug("cache miss: #{package.name}:#{task} (no stored hash)")
    return false
  end

  current = content_hash(package)
  unless stored == current
    Rwm.debug("cache miss: #{package.name}:#{task} (hash changed)")
    return false
  end

  # If outputs are declared, they must exist
  decl = cache_declarations(package)[task]
  if decl && decl["output"]
    unless outputs_exist?(package, decl["output"])
      Rwm.debug("cache miss: #{package.name}:#{task} (outputs missing)")
      return false
    end
  end

  Rwm.debug("cache hit: #{package.name}:#{task}")
  true
end

#content_hash(package) ⇒ Object

Compute a content hash for a package: SHA256 of all source files + dependency hashes



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/rwm/task_cache.rb', line 81

def content_hash(package)
  @content_hash_mutex.synchronize do
    return @content_hashes[package.name] if @content_hashes.key?(package.name)
  end

  digest = Digest::SHA256.new

  # Hash all source files in the package (sorted for determinism)
  source_files(package).each do |file|
    rel_path = file.delete_prefix("#{package.path}/")
    digest.update(rel_path)
    digest.update(File.read(file))
  end

  # Include dependency content hashes (transitive invalidation).
  # If a dependency is missing, let it raise — a stale graph should
  # not silently produce incorrect cache hits.
  @graph.dependencies(package.name).sort.each do |dep_name|
    dep_pkg = @workspace.find_package(dep_name)
    digest.update(content_hash(dep_pkg))
  end

  computed = digest.hexdigest
  @content_hash_mutex.synchronize do
    @content_hashes[package.name] = computed
  end
end

#outputs_exist?(package, output_pattern) ⇒ Boolean

Check if declared output files/globs exist in the package directory

Returns:



75
76
77
78
# File 'lib/rwm/task_cache.rb', line 75

def outputs_exist?(package, output_pattern)
  matches = Dir.glob(File.join(package.path, output_pattern))
  !matches.empty?
end

#preload_declarations(packages) ⇒ Object

Preload cache declarations for multiple packages in parallel. Warms the memoization hash so subsequent cacheable?/cached? calls are instant.



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/rwm/task_cache.rb', line 111

def preload_declarations(packages)
  pending = packages.reject { |pkg| @cache_declarations.key?(pkg.name) }
  return if pending.empty?

  Rwm.debug("cache declarations: preloading #{pending.size} package(s) in parallel")
  concurrency = [Etc.nprocessors, pending.size].min
  threads = []

  pending.each_slice((pending.size.to_f / concurrency).ceil) do |batch|
    threads << Thread.new do
      batch.each { |pkg| cache_declarations(pkg) }
    end
  end

  threads.each(&:join)
end

#store(package, task) ⇒ Object

Store the current content hash after a successful task run



67
68
69
70
71
72
# File 'lib/rwm/task_cache.rb', line 67

def store(package, task)
  Rwm.debug("cache store: #{package.name}:#{task}")
  FileUtils.mkdir_p(@cache_dir)
  path = cache_file(package, task)
  File.write(path, content_hash(package))
end