Class: SiteDiff::Config::Creator

Inherits:
Object
  • Object
show all
Defined in:
lib/sitediff/config/creator.rb

Instance Method Summary collapse

Constructor Details

#initialize(concurrency, interval, whitelist, blacklist, curl_opts, debug, *urls) ⇒ Creator

Returns a new instance of Creator.



14
15
16
17
18
19
20
21
22
23
# File 'lib/sitediff/config/creator.rb', line 14

def initialize(concurrency, interval, whitelist, blacklist, curl_opts, debug, *urls)
  @concurrency = concurrency
  @interval = interval
  @whitelist = whitelist
  @blacklist = blacklist
  @after = urls.pop
  @before = urls.pop # May be nil
  @curl_opts = curl_opts
  @debug = debug
end

Instance Method Details

#altered_paths(path) {|path + '/'| ... } ⇒ Object

Yields:

  • (path + '/')


77
78
79
80
# File 'lib/sitediff/config/creator.rb', line 77

def altered_paths(path)
  yield path + '/'
  yield path.sub(%r{/$}, '')
end

#build_configObject



52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/sitediff/config/creator.rb', line 52

def build_config
  %w[before after].each do |tag|
    next unless (u = roots[tag.to_sym])

    @config[tag] = { 'url' => u }
  end

  crawl(@depth)
  @rules&.add_config

  @config['paths'] = @paths.values.reduce(&:|).to_a.sort
end

#canonicalize(_tag, path) ⇒ Object

Deduplicate paths with slashes at the end



76
77
78
79
80
81
82
83
# File 'lib/sitediff/config/creator.rb', line 76

def canonicalize(_tag, path)
  def altered_paths(path)
    yield path + '/'
    yield path.sub(%r{/$}, '')
  end

  path.empty? ? '/' : path
end

#config_fileObject



121
122
123
# File 'lib/sitediff/config/creator.rb', line 121

def config_file
  @dir + Config::DEFAULT_FILENAME
end

#crawl(depth = nil) ⇒ Object



65
66
67
68
69
70
71
72
73
# File 'lib/sitediff/config/creator.rb', line 65

def crawl(depth = nil)
  hydra = Typhoeus::Hydra.new(max_concurrency: @concurrency)
  roots.each do |tag, u|
    Crawler.new(hydra, u, @interval, @whitelist, @blacklist, depth, @curl_opts, @debug) do |info|
      crawled_path(tag, info)
    end
  end
  hydra.run
end

#crawled_path(tag, info) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/sitediff/config/creator.rb', line 85

def crawled_path(tag, info)
  path, dup = canonicalize(tag, info.relative)
  return if dup

  res = info.read_result

  @callback[tag, info]
  @paths[tag] << path
  @cache.set(tag, path, res)

  # If single-site, cache after as before!
  @cache.set(:before, path, res) unless roots[:before]

  # This is used to populate the list of rules we guess are
  # applicable to the current site.
  @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
end

#create(opts, &block) ⇒ Object

Build a config structure, return it



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/sitediff/config/creator.rb', line 34

def create(opts, &block)
  @config = {}
  @callback = block
  @dir = Pathname.new(opts[:directory])

  # Handle other options
  @depth = opts[:depth]
  @rules = Rules.new(@config, opts[:rules_disabled]) if opts[:rules]

  # Setup instance vars
  @paths = Hash.new { |h, k| h[k] = Set.new }
  @cache = Cache.new(directory: @dir.to_s, create: true)
  @cache.write_tags << :before << :after

  build_config
  write_config
end

#directoryObject



117
118
119
# File 'lib/sitediff/config/creator.rb', line 117

def directory
  @dir
end

#make_gitignore(dir) ⇒ Object

Create a gitignore if we seem to be in git



104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/sitediff/config/creator.rb', line 104

def make_gitignore(dir)
  # Check if we're in git
  return unless dir.realpath.to_enum(:ascend).any? { |d| d.+('.git').exist? }

  dir.+('.gitignore').open('w') do |f|
    f.puts <<-GITIGNORE.gsub(/^\s+/, '')
      output
      cache.db
      cache.db.db
    GITIGNORE
  end
end

#rootsObject



25
26
27
28
29
30
31
# File 'lib/sitediff/config/creator.rb', line 25

def roots
  @roots = begin
    r = { after: @after }
    r[:before] = @before if @before
    r
  end
end

#write_configObject

Turn a config structure into a config file



126
127
128
129
# File 'lib/sitediff/config/creator.rb', line 126

def write_config
  make_gitignore(@dir)
  config_file.open('w') { |f| f.puts @config.to_yaml }
end