Class: SiteDiff
- Inherits:
-
Object
- Object
- SiteDiff
- Defined in:
- lib/sitediff.rb,
lib/sitediff/cli.rb,
lib/sitediff/diff.rb,
lib/sitediff/cache.rb,
lib/sitediff/fetch.rb,
lib/sitediff/rules.rb,
lib/sitediff/config.rb,
lib/sitediff/result.rb,
lib/sitediff/crawler.rb,
lib/sitediff/sanitize.rb,
lib/sitediff/exception.rb,
lib/sitediff/webserver.rb,
lib/sitediff/uriwrapper.rb,
lib/sitediff/config/creator.rb,
lib/sitediff/sanitize/regexp.rb,
lib/sitediff/sanitize/dom_transform.rb,
lib/sitediff/webserver/resultserver.rb
Defined Under Namespace
Modules: Diff Classes: Cache, Cli, Config, Crawler, Fetch, Result, Rules, Sanitizer, SiteDiffException, SiteDiffReadFailure, UriWrapper, Webserver
Constant Summary collapse
- FILES_DIR =
path to misc. static files (e.g. erb, css files)
File.join(File.dirname(__FILE__), 'sitediff', 'files')
- DIFFS_DIR =
subdirectory containing all failing diffs
'diffs'- FAILURES_FILE =
files in output
'failures.txt'- REPORT_FILE =
'report.html'- SETTINGS_FILE =
'settings.yaml'
Instance Attribute Summary collapse
-
#config ⇒ Object
readonly
Returns the value of attribute config.
-
#results ⇒ Object
readonly
Returns the value of attribute results.
Class Method Summary collapse
-
.log(str, type = :info, label = nil) ⇒ Object
label will be colorized and str will not be.
Instance Method Summary collapse
- #after ⇒ Object
- #before ⇒ Object
-
#dump(dir, report_before, report_after) ⇒ Object
Dump results to disk.
-
#initialize(config, cache, concurrency, interval, verbose = true, debug = false) ⇒ SiteDiff
constructor
A new instance of SiteDiff.
-
#process_results(path, read_results) ⇒ Object
Process a set of read results.
-
#run(curl_opts = {}, debug = true) ⇒ Object
Perform the comparison, populate @results and return the number of failing paths (paths with non-zero diff).
-
#sanitize(path, read_results) ⇒ Object
Sanitize HTML.
Constructor Details
#initialize(config, cache, concurrency, interval, verbose = true, debug = false) ⇒ SiteDiff
Returns a new instance of SiteDiff.
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/sitediff.rb', line 57 def initialize(config, cache, concurrency, interval, verbose = true, debug = false) @cache = cache @verbose = verbose @debug = debug @interval = interval # Check for single-site mode validate_opts = {} if !config.before['url'] && @cache.tag?(:before) unless @cache..include?(:before) raise SiteDiffException, "A cached 'before' is required for single-site mode" end validate_opts[:need_before] = false end config.validate(validate_opts) @concurrency = concurrency @config = config end |
Instance Attribute Details
#config ⇒ Object (readonly)
Returns the value of attribute config.
48 49 50 |
# File 'lib/sitediff.rb', line 48 def config @config end |
#results ⇒ Object (readonly)
Returns the value of attribute results.
48 49 50 |
# File 'lib/sitediff.rb', line 48 def results @results end |
Class Method Details
.log(str, type = :info, label = nil) ⇒ Object
label will be colorized and str will not be. type dictates the color: can be :success, :error, or :failure
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/sitediff.rb', line 25 def self.log(str, type = :info, label = nil) label = label ? "[sitediff] #{label}" : '[sitediff]' bg = fg = nil case type when :info bg = fg = nil when :diff_success bg = :green fg = :black when :diff_failure bg = :red when :warn bg = :yellow fg = :black when :error bg = :red end label = Rainbow(label) label = label.bg(bg) if bg label = label.fg(fg) if fg puts label + ' ' + str end |
Instance Method Details
#after ⇒ Object
53 54 55 |
# File 'lib/sitediff.rb', line 53 def after @config.after['url'] end |
#before ⇒ Object
49 50 51 |
# File 'lib/sitediff.rb', line 49 def before @config.before['url'] end |
#dump(dir, report_before, report_after) ⇒ Object
Dump results to disk
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/sitediff.rb', line 144 def dump(dir, report_before, report_after) report_before ||= before report_after ||= after dir = Pathname.new(dir) dir.mkpath unless dir.directory? # store diffs of each failing case, first wipe out existing diffs diff_dir = dir + DIFFS_DIR diff_dir.rmtree if diff_dir.exist? results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE } SiteDiff.log "All diff files were dumped inside #{dir.}" # store failing paths failures = dir + FAILURES_FILE SiteDiff.log "Writing failures to #{failures.}" failures.open('w') do |f| results.each { |r| f.puts r.path unless r.success? } end # create report of results report = Diff.generate_html_report(results, report_before, report_after, @cache) dir.+(REPORT_FILE).open('w') { |f| f.write(report) } # serve some settings settings = { 'before' => report_before, 'after' => report_after, 'cached' => %w[before after] } dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) } end |
#process_results(path, read_results) ⇒ Object
Process a set of read results
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/sitediff.rb', line 92 def process_results(path, read_results) if (error = (read_results[:before].error || read_results[:after].error)) diff = Result.new(path, nil, nil, nil, nil, error) else begin diff = Result.new(path, *sanitize(path, read_results), read_results[:before].encoding, read_results[:after].encoding, nil) rescue => e raise if @debug Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}") end end @results[path] = diff # Print results in order! while (next_diff = @results[@ordered.first]) next_diff.log(@verbose) @ordered.shift end end |
#run(curl_opts = {}, debug = true) ⇒ Object
Perform the comparison, populate @results and return the number of failing paths (paths with non-zero diff).
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/sitediff.rb', line 119 def run(curl_opts = {}, debug = true) # Map of path -> Result object, populated by process_results @results = {} @ordered = @config.paths.dup unless @cache..empty? SiteDiff.log('Using sites from cache: ' + @cache..sort.join(', ')) end # TODO: Fix this after config merge refactor! # Not quite right. We are not passing @config.before or @config.after # so passing this instead but @config.after['curl_opts'] is ignored. config_curl_opts = @config.before['curl_opts'] curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts fetcher = Fetch.new(@cache, @config.paths, @interval, @concurrency, curl_opts, debug, before: before, after: after) fetcher.run(&method(:process_results)) # Order by original path order @results = @config.paths.map { |p| @results[p] } results.map { |r| r unless r.success? }.compact.length end |
#sanitize(path, read_results) ⇒ Object
Sanitize HTML
78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/sitediff.rb', line 78 def sanitize(path, read_results) %i[before after].map do |tag| html = read_results[tag].content encoding = read_results[tag].encoding if encoding config = @config.send(tag) Sanitizer.new(html, config, path: path).sanitize else html end end end |