Class: SiteDiff::Rules

Inherits:
Object
  • Object
show all
Defined in:
lib/sitediff/rules.rb

Overview

Find appropriate rules for a given site

Instance Method Summary collapse

Constructor Details

#initialize(config, disabled = false) ⇒ Rules



10
11
12
13
14
15
# File 'lib/sitediff/rules.rb', line 10

def initialize(config, disabled = false)
  @disabled = disabled
  @config = config
  find_sanitization_candidates
  @rules = Hash.new { |h, k| h[k] = Set.new }
end

Instance Method Details

#add_configObject

Find all rules from all rulesets that apply for all pages



44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/sitediff/rules.rb', line 44

def add_config
  have_both = @rules.include?(:before)

  r1, r2 = *@rules.values_at(:before, :after)
  if have_both
    add_section('before', r1 - r2)
    add_section('after', r2 - r1)
    add_section(nil, r1 & r2)
  else
    add_section(nil, r2)
  end
end

#add_section(name, rules) ⇒ Object



57
58
59
60
61
62
63
# File 'lib/sitediff/rules.rb', line 57

def add_section(name, rules)
  return if rules.empty?

  conf = name ? @config[name] : @config
  rules.each { |r| r['disabled'] = true } if @disabled
  conf['sanitization'] = rules.to_a.sort_by { |r| r['title'] }
end

#find_rules(html, doc) ⇒ Object

Yield a set of rules that seem reasonable for this HTML assumption: the YAML file is a list of regexp rules only



36
37
38
39
40
41
# File 'lib/sitediff/rules.rb', line 36

def find_rules(html, doc)
  @candidates.select do |rule|
    re = SiteDiff::Sanitizer::Regexp.create(rule)
    re.applies?(html, doc)
  end
end

#find_sanitization_candidatesObject



17
18
19
20
21
22
23
24
25
26
27
# File 'lib/sitediff/rules.rb', line 17

def find_sanitization_candidates
  @candidates = Set.new

  rules_dir = Pathname.new(__FILE__).dirname + 'files' + 'rules'
  rules_dir.children.each do |f|
    next unless f.file? && f.extname == '.yaml'

    conf = YAML.load_file(f)
    @candidates.merge(conf['sanitization'])
  end
end

#handle_page(tag, html, doc) ⇒ Object



29
30
31
32
# File 'lib/sitediff/rules.rb', line 29

def handle_page(tag, html, doc)
  found = find_rules(html, doc)
  @rules[tag].merge(found)
end