Class: ConfidentialInfoRedactorLite::Redactor

Inherits:
Object
  • Object
show all
Defined in:
lib/confidential_info_redactor_lite/redactor.rb

Overview

This class redacts various tokens from a text

Constant Summary collapse

NUMBER_REGEX =
/(?<=\A|\A\()[^(]?\d+((,|\.|\/)*\d)*(\D?\s|\s|[[:cntrl:]]|[[:space:]]|\.?\s|\.$|$)|(?<=[[:cntrl:]]|[[:space:]]|\s|\s\(|\s'|\sā€˜)[^('ā€˜]?\d+((,|\.|\/)*\d)*\"*(?=(\D?\s|\s|[[:cntrl:]]|[[:space:]]|\.?\s|\.$|$))|(?<=\s)\d+(nd|th|st)|(?<=\s)\d+\/\d+\"*(?=\s)|(?<=\()\S{1}\d+(?=\))|(?<=\s{1})\S{1}\d+\z|^\d+$|(?<=\A|\A\(|\s|[[:cntrl:]]|[[:space:]]|\s\()[^(]?\d+((,|\.|\/)*\d)*\D{2}(?=($|\s+))/
EMAIL_REGEX =
/(?<=\A|\s|\()[\w+\-.]+@[a-z\d\-]+(\.[a-z]+)*\.[a-z]+(?=\z|\s|\.|\))/i

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dow:, dow_abbr:, months:, months_abbr:, **args) ⇒ Redactor

Returns a new instance of Redactor.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 13

def initialize(dow:, dow_abbr:, months:, months_abbr:, **args)
  @language = args[:language] || 'en'
  @tokens = args[:tokens]
  @number_text = args[:number_text] || '<redacted number>'
  @date_text = args[:date_text] || '<redacted date>'
  @token_text = args[:token_text] || '<redacted>'
  @email_text = args[:email_text] || '<redacted email>'
  @hyperlink_text = args[:hyperlink_text] || '<redacted hyperlink>'
  @ignore_emails = args[:ignore_emails]
  @ignore_dates = args[:ignore_dates]
  @ignore_numbers = args[:ignore_numbers]
  @ignore_hyperlinks = args[:ignore_hyperlinks]
  @dow = dow
  @dow_abbr = dow_abbr
  @months = months
  @months_abbr = months_abbr
end

Instance Attribute Details

#date_textObject (readonly)

Returns the value of attribute date_text.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def date_text
  @date_text
end

#dowObject (readonly)

Returns the value of attribute dow.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def dow
  @dow
end

#dow_abbrObject (readonly)

Returns the value of attribute dow_abbr.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def dow_abbr
  @dow_abbr
end

#email_textObject (readonly)

Returns the value of attribute email_text.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def email_text
  @email_text
end

Returns the value of attribute hyperlink_text.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def hyperlink_text
  @hyperlink_text
end

#ignore_datesObject (readonly)

Returns the value of attribute ignore_dates.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def ignore_dates
  @ignore_dates
end

#ignore_emailsObject (readonly)

Returns the value of attribute ignore_emails.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def ignore_emails
  @ignore_emails
end

Returns the value of attribute ignore_hyperlinks.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def ignore_hyperlinks
  @ignore_hyperlinks
end

#ignore_numbersObject (readonly)

Returns the value of attribute ignore_numbers.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def ignore_numbers
  @ignore_numbers
end

#languageObject (readonly)

Returns the value of attribute language.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def language
  @language
end

#monthsObject (readonly)

Returns the value of attribute months.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def months
  @months
end

#months_abbrObject (readonly)

Returns the value of attribute months_abbr.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def months_abbr
  @months_abbr
end

#number_textObject (readonly)

Returns the value of attribute number_text.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def number_text
  @number_text
end

#token_textObject (readonly)

Returns the value of attribute token_text.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def token_text
  @token_text
end

#tokensObject (readonly)

Returns the value of attribute tokens.



12
13
14
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 12

def tokens
  @tokens
end

Instance Method Details

#dates(text) ⇒ Object



31
32
33
34
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 31

def dates(text)
  return '' if text.nil?
  redact_dates(text)
end

#dates_html(text) ⇒ Object



36
37
38
39
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 36

def dates_html(text)
  return [] if text.nil?
  redact_dates_html(text)
end

#emails(text) ⇒ Object



51
52
53
54
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 51

def emails(text)
  return '' if text.nil?
  redact_emails(text)
end

#emails_html(text) ⇒ Object



56
57
58
59
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 56

def emails_html(text)
  return [] if text.nil?
  redact_emails_html(text)
end


61
62
63
64
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 61

def hyperlinks(text)
  return '' if text.nil?
  redact_hyperlinks(text)
end


66
67
68
69
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 66

def hyperlinks_html(text)
  return [] if text.nil?
  redact_hyperlinks_html(text)
end

#numbers(text) ⇒ Object



41
42
43
44
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 41

def numbers(text)
  return '' if text.nil?
  redact_numbers(text)
end

#numbers_html(text) ⇒ Object



46
47
48
49
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 46

def numbers_html(text)
  return [] if text.nil?
  redact_numbers_html(text)
end

#proper_nouns(text) ⇒ Object



71
72
73
74
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 71

def proper_nouns(text)
  return '' if text.nil?
  redact_tokens(text)
end

#redact(text) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 76

def redact(text)
  return '' if text.nil?
  if ignore_emails
    redacted_text = text
  else
    redacted_text = redact_emails(text)
  end
  redacted_text = redact_hyperlinks(redacted_text) unless ignore_hyperlinks
  redacted_text = redact_dates(redacted_text) unless ignore_dates
  redacted_text = redact_numbers(redacted_text) unless ignore_numbers
  redact_tokens(redacted_text)
end

#redact_html(text) ⇒ Object



89
90
91
92
93
94
95
# File 'lib/confidential_info_redactor_lite/redactor.rb', line 89

def redact_html(text)
  return [] if text.nil?
  redacted_text = redact_dates_html(text)[0]
  redacted_text = redact_emails_html(redacted_text)[0]
  redacted_text = redact_hyperlinks_html(redacted_text)[0]
  redact_numbers_html(redacted_text)[0]
end