Class: FindSubscriptions::PayeeNormalizer

Inherits:
Object
  • Object
show all
Defined in:
lib/find_subscriptions/payee_normalizer.rb

Overview

Normalizes and displays payee names using YAML-defined rules (regex patterns and canonical names).

Defined Under Namespace

Classes: Rule

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(rules: []) ⇒ PayeeNormalizer

Returns a new instance of PayeeNormalizer.



12
13
14
# File 'lib/find_subscriptions/payee_normalizer.rb', line 12

def initialize(rules: [])
  @rules = rules
end

Instance Attribute Details

#rulesObject (readonly)

Returns the value of attribute rules.



8
9
10
# File 'lib/find_subscriptions/payee_normalizer.rb', line 8

def rules
  @rules
end

Class Method Details

.build_rule(rule_hash) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
# File 'lib/find_subscriptions/payee_normalizer.rb', line 44

def self.build_rule(rule_hash)
  unless rule_hash.is_a?(Hash) && rule_hash['normalized'] && rule_hash['patterns']
    raise ArgumentError, "Each rule needs 'normalized' and 'patterns'"
  end

  Rule.new(
    name: rule_hash['name'],
    normalized: rule_hash['normalized'].to_s,
    regexes: Array(rule_hash['patterns']).map { |p| parse_regex(p) }
  )
end

.from_yaml(path) ⇒ Object

Raises:

  • (ArgumentError)


35
36
37
38
39
40
41
42
# File 'lib/find_subscriptions/payee_normalizer.rb', line 35

def self.from_yaml(path)
  return new(rules: []) unless path && File.exist?(path)

  data = YAML.load_file(path)
  raise ArgumentError, 'known payees YAML must be an array of rules' unless data.is_a?(Array)

  new(rules: data.map { |h| build_rule(h) })
end

.parse_regex(str) ⇒ Object



56
57
58
59
60
61
62
63
64
# File 'lib/find_subscriptions/payee_normalizer.rb', line 56

def self.parse_regex(str)
  s = str.to_s.strip
  unless s.start_with?('/') && s.count('/') >= 2
    raise ArgumentError, "Invalid regex string: #{str.inspect} (expected like \"/foo/i\")"
  end

  last_slash = s.rindex('/')
  Regexp.new(s[1...last_slash], regex_flags(s[(last_slash + 1)..]))
end

.regex_flags(flags) ⇒ Object



66
67
68
69
70
71
72
# File 'lib/find_subscriptions/payee_normalizer.rb', line 66

def self.regex_flags(flags)
  opts = 0
  opts |= Regexp::IGNORECASE if flags&.include?('i')
  opts |= Regexp::MULTILINE if flags&.include?('m')
  opts |= Regexp::EXTENDED if flags&.include?('x')
  opts
end

Instance Method Details

#display_name(raw_payee) ⇒ Object



25
26
27
28
29
# File 'lib/find_subscriptions/payee_normalizer.rb', line 25

def display_name(raw_payee)
  text = raw_payee.to_s
  rule = @rules.find { |r| r.regexes.any? { |re| re.match?(text) } }
  rule&.name
end

#fallback_normalize(payee) ⇒ Object



74
75
76
77
78
79
# File 'lib/find_subscriptions/payee_normalizer.rb', line 74

def fallback_normalize(payee)
  payee.downcase
       .gsub(/[^a-z0-9\s+]/, ' ') # keep + since you use P+
       .gsub(/\s+/, ' ')
       .strip
end

#known_payee_key?(normalized_key) ⇒ Boolean

Returns:

  • (Boolean)


31
32
33
# File 'lib/find_subscriptions/payee_normalizer.rb', line 31

def known_payee_key?(normalized_key)
  @rules.any? { |r| r.normalized == normalized_key }
end

#normalize(raw_payee) ⇒ Object



16
17
18
19
20
21
22
23
# File 'lib/find_subscriptions/payee_normalizer.rb', line 16

def normalize(raw_payee)
  text = raw_payee.to_s
  rule = @rules.find { |r| r.regexes.any? { |re| re.match?(text) } }
  return rule.normalized if rule

  # fallback: generic normalization (same idea you used before)
  fallback_normalize(text)
end