Class: SiteDiff::Config

Inherits:
Object
  • Object
show all
Defined in:
lib/sitediff/config.rb,
lib/sitediff/config/preset.rb,
lib/sitediff/config/creator.rb

Overview

SiteDiff Configuration.

Defined Under Namespace

Classes: ConfigNotFound, Creator, InvalidConfig, Preset

Constant Summary collapse

DEFAULT_FILENAME =

Default config file.

'sitediff.yaml'
DEFAULT_PATHS_FILENAME =

Default paths file.

'paths.txt'
DEFAULT_CONFIG =

Default SiteDiff config.

{
  'settings' => {
    'depth' => 3,
    'interval' => 0,
    'include' => '',
    'exclude' => '',
    'concurrency' => 3,
    'preset' => nil
  },
  'before' => {},
  'after' => {},
  'paths' => []
}.freeze
ALLOWED_CONFIG_KEYS =

Keys allowed in config files. TODO: Deprecate repeated params before_url and after_url. TODO: Create a method self.supports TODO: Deprecate in favor of self.supports key, subkey, subkey…

Sanitizer::TOOLS.values.flatten(1) + %w[
  includes
  settings
  before
  after
  before_url
  after_url
  ignore_whitespace
  export
  output
  report
]
ALLOWED_SETTINGS_KEYS =

Keys allowed in the “settings” key. TODO: Create a method self.supports TODO: Deprecate in favor of self.supports key, subkey, subkey…

%w[
  preset
  depth
  include
  exclude
  concurrency
  interval
  curl_opts
].freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file, directory) ⇒ Config

Creates a SiteDiff Config object.



247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/sitediff/config.rb', line 247

def initialize(file, directory)
  # Fallback to default config filename, if none is specified.
  file = File.join(directory, DEFAULT_FILENAME) if file.nil?
  unless File.exist?(file)
    path = File.expand_path(file)
    raise InvalidConfig, "Missing config file #{path}."
  end
  @config = Config.merge(DEFAULT_CONFIG, Config.load_conf(file))
  @file = file
  @directory = directory

  @preset_applied = { 'before' => false, 'after' => false }
  # Validate configurations.
  validate
end

Instance Attribute Details

#directoryObject (readonly)

Returns the value of attribute directory.



67
68
69
# File 'lib/sitediff/config.rb', line 67

def directory
  @directory
end

Class Method Details

.create_regexp(string_param) ⇒ Object

Creates a RegExp from a string.



468
469
470
471
472
473
474
475
476
477
478
479
# File 'lib/sitediff/config.rb', line 468

def self.create_regexp(string_param)
  begin
    @return_value = string_param == '' ? nil : Regexp.new(string_param)
  rescue SiteDiffException => e
    @return_value = nil
    SiteDiff.log "Invalid RegExp: #{string_param}", :error
    SiteDiff.log e.message, :error
    # TODO: Use SiteDiff.log type :debug
    # SiteDiff.log e.backtrace, :error if options[:verbose]
  end
  @return_value
end

.merge(first, second) ⇒ Object

Merges two normalized Hashes according to the following rules: 1 paths are merged as arrays. 2 before and after: for each subhash H (e.g. [‘before’]):

a)  if first[H] and second[H] are expected to be arrays, their values
    are merged as such,
b)  if first[H] and second[H] are expected to be scalars, the value for
    second[H] is kept if and only if first[H] is nil.

For example, merge(h1, h2) results in h3:

(h1) before: foo, sanitization: [pattern: foo] (h2) before: bar, sanitization: [pattern: bar] (h3) before: foo, sanitization: [pattern: foo, pattern: bar]



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/sitediff/config.rb', line 133

def self.merge(first, second)
  result = {
    'before' => {},
    'after' => {},
    'output' => [],
    'settings' => {}
  }

  # Merge sanitization rules.
  Sanitizer::TOOLS.values.flatten(1).each do |key|
    result[key] = second[key] || first[key]
    result.delete(key) unless result[key]
  end

  # Rule 1.
  %w[before after].each do |pos|
    first[pos] ||= {}
    second[pos] ||= {}

    # If only the second hash has the value.
    unless first[pos]
      result[pos] = second[pos] || {}
      next
    end

    result[pos] = first[pos].merge!(second[pos]) do |key, a, b|
      # Rule 2a.
      result[pos][key] = if Sanitizer::TOOLS[:array].include? key
                           (a || []) + (b || [])
                         elsif key == 'settings'
                           b
                         else
                           a || b # Rule 2b.
                         end
    end
  end

  # Merge output array.
  result['output'] += (first['output'] || []) + (second['output'] || [])

  # Merge url_report keys.
  %w[before_url_report after_url_report].each do |pos|
    result[pos] = first[pos] || second[pos]
  end

  # Merge settings.
  result['settings'] = merge_deep(
    first['settings'] || {},
    second['settings'] || {}
  )

  # Merge report labels.
  result['report'] = merge_deep(
    first['report'] || {},
    second['report'] || {}
  )

  result
end

.merge_deep(first, second) ⇒ Object

Merges 2 iterable objects deeply.



195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/sitediff/config.rb', line 195

def self.merge_deep(first, second)
  first.merge(second) do |_key, val1, val2|
    case val1.class
    when Hash
      self.class.merge_deep(val1, val2 || {})
    when Array
      val1 + (val2 || [])
    else
      val2
    end
  end
end

.normalize(conf) ⇒ Object

Takes a Hash and normalizes it to the following form by merging globals into before and after. A normalized config Hash looks like this:

paths:
- /about

before:
  url: http://before
  selector: body
  ## Note: use either `selector` or `regions`, but not both
  regions:
    - name: title
      selector: .field-name-title h2
    - name: body
      selector: .field-name-field-news-description .field-item
  dom_transform:
  - type: remove
    selector: script

after:
  url: http://after
  selector: body

## Note: use `output` only with `regions`
output:
  - title
  - author
  - source
  - body


99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/sitediff/config.rb', line 99

def self.normalize(conf)
  tools = Sanitizer::TOOLS

  # Merge globals
  %w[before after].each do |pos|
    conf[pos] ||= {}
    tools[:array].each do |key|
      conf[pos][key] ||= []
      conf[pos][key] += conf[key] if conf[key]
    end
    tools[:scalar].each { |key| conf[pos][key] ||= conf[key] }
    conf[pos]['url'] ||= conf["pos#{_url}"] if defined?(_url)
    conf[pos]['curl_opts'] = conf['curl_opts']
  end

  # Normalize paths.
  conf['paths'] = Config.normalize_paths(conf['paths'])

  conf.select { |k, _v| ALLOWED_CONFIG_KEYS.include? k }
end

.remove_defaults(data) ⇒ Object

Removes default parameters from a config hash.

I know this is weird, but it’ll be fixed. The config management needs to be streamlined further.



223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/sitediff/config.rb', line 223

def self.remove_defaults(data)
  # Create a deep copy of the config data.
  result = data

  # Exclude default settings.
  result['settings'].delete_if do |key, value|
    value == DEFAULT_CONFIG['settings'][key] || !value
  end

  # Exclude default curl opts.
  result['settings']['curl_opts'] ||= {}
  result['settings']['curl_opts'].delete_if do |key, value|
    value == UriWrapper::DEFAULT_CURL_OPTS[key.to_sym]
  end

  # Delete curl opts if empty.
  unless result['settings']['curl_opts'].length.positive?
    result['settings'].delete('curl_opts')
  end

  result
end

.stringify_keys(object) ⇒ Object

Returns object clone with stringified keys. TODO: Make this method available globally, if required.



451
452
453
454
455
456
457
458
459
460
461
462
463
464
# File 'lib/sitediff/config.rb', line 451

def self.stringify_keys(object)
  # Do nothing if it is not an object.
  return object unless object.respond_to?('each_key')

  # Convert symbol indices to strings.
  output = {}
  object.each_key do |old_k|
    new_k = old_k.is_a?(Symbol) ? old_k.to_s : old_k
    output[new_k] = stringify_keys object[old_k]
  end

  # Return the new hash with string indices.
  output
end

Instance Method Details

#after(apply_preset: false) ⇒ Object

Get “after” site configuration.



275
276
277
# File 'lib/sitediff/config.rb', line 275

def after(apply_preset: false)
  section(:after, with_preset: apply_preset)
end

#after_time=(time) ⇒ Object

Set crawl time for ‘after’



350
351
352
# File 'lib/sitediff/config.rb', line 350

def after_time=(time)
  @config['report']['after_time'] = time
end

#after_urlObject

Get “after” site URL.



280
281
282
283
# File 'lib/sitediff/config.rb', line 280

def after_url
  result = after
  result['url'] if result
end

#allHash

Gets all loaded configuration except defaults.

Returns:

  • (Hash)

    Config data.



213
214
215
216
# File 'lib/sitediff/config.rb', line 213

def all
  result = Marshal.load(Marshal.dump(@config))
  self.class.remove_defaults(result)
end

#before(apply_preset: false) ⇒ Object

Get “before” site configuration.



264
265
266
# File 'lib/sitediff/config.rb', line 264

def before(apply_preset: false)
  section(:before, with_preset: apply_preset)
end

#before_time=(time) ⇒ Object

Set crawl time for ‘before’



345
346
347
# File 'lib/sitediff/config.rb', line 345

def before_time=(time)
  @config['report']['before_time'] = time
end

#before_urlObject

Get “before” site URL.



269
270
271
272
# File 'lib/sitediff/config.rb', line 269

def before_url
  result = before
  result['url'] if result
end

#curl_optsObject

Return merged CURL options.



483
484
485
486
487
488
489
490
491
# File 'lib/sitediff/config.rb', line 483

def curl_opts
  # We do want string keys here
  bool_hash = { 'true' => true, 'false' => false }
  curl_opts = UriWrapper::DEFAULT_CURL_OPTS
              .clone
              .merge(settings['curl_opts'] || {})
  curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
  curl_opts
end

#exportObject

Get export option



318
319
320
# File 'lib/sitediff/config.rb', line 318

def export
  @config['export']
end

#export=(export) ⇒ Object

Set export option



323
324
325
# File 'lib/sitediff/config.rb', line 323

def export=(export)
  @config['export'] = export
end

#ignore_whitespaceObject

Get ignore_whitespace option



298
299
300
# File 'lib/sitediff/config.rb', line 298

def ignore_whitespace
  @config['ignore_whitespace']
end

#ignore_whitespace=(ignore_whitespace) ⇒ Object

Set ignore_whitespace option



303
304
305
# File 'lib/sitediff/config.rb', line 303

def ignore_whitespace=(ignore_whitespace)
  @config['ignore_whitespace'] = ignore_whitespace
end

#outputObject

Get output option



328
329
330
# File 'lib/sitediff/config.rb', line 328

def output
  @config['output']
end

#output=(output) ⇒ Object

Set output option



333
334
335
336
337
# File 'lib/sitediff/config.rb', line 333

def output=(output)
  raise 'Output must be an Array' unless output.is_a? Array

  @config['output'] = output
end

#pathsObject

Get paths.



286
287
288
# File 'lib/sitediff/config.rb', line 286

def paths
  @config['paths']
end

#paths=(paths) ⇒ Object

Set paths.



291
292
293
294
295
# File 'lib/sitediff/config.rb', line 291

def paths=(paths)
  raise 'Paths must be an Array' unless paths.is_a? Array

  @config['paths'] = Config.normalize_paths(paths)
end

#paths_file_read(file = nil) ⇒ Integer

Reads a collection of paths from a file.

Parameters:

  • file (String) (defaults to: nil)

    A file containing one path per line.

Returns:

  • (Integer)

    Number of paths read.



378
379
380
381
382
383
384
385
386
387
388
389
# File 'lib/sitediff/config.rb', line 378

def paths_file_read(file = nil)
  file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)

  unless File.exist? file
    raise Config::InvalidConfig, "File not found: #{file}"
  end

  self.paths = File.readlines(file)

  # Return the number of paths.
  paths.length
end

#paths_file_write(paths, file = nil) ⇒ Object

Writes an array of paths to a file.

Parameters:

  • paths (Array)

    An array of paths.

  • file (String) (defaults to: nil)

    Optional path to a file.



361
362
363
364
365
366
367
368
# File 'lib/sitediff/config.rb', line 361

def paths_file_write(paths, file = nil)
  unless paths.is_a?(Array) && paths.length.positive?
    raise SiteDiffException, 'Write failed. Invalid paths.'
  end

  file ||= File.join(@directory, DEFAULT_PATHS_FILENAME)
  File.open(file, 'w+') { |f| f.puts(paths) }
end

#remove_html_commentsObject

Get remove_html_comments option



308
309
310
# File 'lib/sitediff/config.rb', line 308

def remove_html_comments
  @config['remove_html_comments']
end

#remove_html_comments=(remove_html_comments) ⇒ Object

Set ignore_whitespace option



313
314
315
# File 'lib/sitediff/config.rb', line 313

def remove_html_comments=(remove_html_comments)
  @config['remove_html_comments'] = remove_html_comments
end

#reportObject

Return report display settings.



340
341
342
# File 'lib/sitediff/config.rb', line 340

def report
  @config['report']
end

#rootsObject

Get roots.

Example: If the config has a “before” and “after” sections, then roots will be [“before”, “after”].



396
397
398
399
400
# File 'lib/sitediff/config.rb', line 396

def roots
  @roots = { 'after' => after_url }
  @roots['before'] = before_url if before
  @roots
end

#setting(key) ⇒ *

Gets a setting.

Parameters:

  • key (String)

    A key.

Returns:

  • (*)

    A value, if exists.



410
411
412
413
# File 'lib/sitediff/config.rb', line 410

def setting(key)
  key = key.to_s if key.is_a?(Symbol)
  return @config['settings'][key] if @config['settings'].key?(key)
end

#settingsHash

Gets all settings.

TODO: Make sure the settings are not writable.

Returns:

  • (Hash)

    All settings.



422
423
424
# File 'lib/sitediff/config.rb', line 422

def settings
  @config['settings']
end

#validate(opts = {}) ⇒ Object

Checks if the configuration is usable for diff-ing. TODO: Do we actually need the opts argument?

Raises:



428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
# File 'lib/sitediff/config.rb', line 428

def validate(opts = {})
  opts = { need_before: true }.merge(opts)

  if opts[:need_before] && !before['url']
    raise InvalidConfig, "Undefined 'before' base URL."
  end

  raise InvalidConfig, "Undefined 'after' base URL." unless after['url']

  # Validate interval and concurrency.
  interval = setting(:interval)
  concurrency = setting(:concurrency)
  if interval.to_i != 0 && concurrency != 1
    raise InvalidConfig, 'Concurrency must be 1 when an interval is set.'
  end

  # Validate preset.
  Preset.exist? setting(:preset), exception: true if setting(:preset)
end