Class: ConfidentialInfoRedactor::Redactor

Inherits:
Object
  • Object
show all
Defined in:
lib/confidential_info_redactor/redactor.rb

Overview

This class redacts various tokens from a text

Constant Summary collapse

NUMBER_REGEX =
/(?<=\A|\A\()[^(]?\d+((,|\.)*\d)*(\D?\s|\s|\.?\s|\.$)|(?<=\s|\s\()[^(]?\d+((,|\.)*\d)*(?=(\D?\s|\s|\.?\s|\.$))|(?<=\s)\d+(nd|th|st)|(?<=\s)\d+\/\d+\"*(?=\s)|(?<=\()\S{1}\d+(?=\))|(?<=\s{1})\S{1}\d+\z/
EMAIL_REGEX =
/(?<=\A|\s|\()[\w+\-.]+@[a-z\d\-]+(\.[a-z]+)*\.[a-z]+(?=\z|\s|\.|\))/i

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text:, **args) ⇒ Redactor

Returns a new instance of Redactor.



13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/confidential_info_redactor/redactor.rb', line 13

def initialize(text:, **args)
  @text = text
  @language = args[:language] || 'en'
  @tokens = args[:tokens]
  @number_text = args[:number_text] || '<redacted number>'
  @date_text = args[:date_text] || '<redacted date>'
  @token_text = args[:token_text] || '<redacted>'
  @ignore_emails = args[:ignore_emails]
  @ignore_dates = args[:ignore_dates]
  @ignore_numbers = args[:ignore_numbers]
  @ignore_hyperlinks = args[:ignore_hyperlinks]
end

Instance Attribute Details

#date_textObject (readonly)

Returns the value of attribute date_text.



12
13
14
# File 'lib/confidential_info_redactor/redactor.rb', line 12

def date_text
  @date_text
end

#ignore_datesObject (readonly)

Returns the value of attribute ignore_dates.



12
13
14
# File 'lib/confidential_info_redactor/redactor.rb', line 12

def ignore_dates
  @ignore_dates
end

#ignore_emailsObject (readonly)

Returns the value of attribute ignore_emails.



12
13
14
# File 'lib/confidential_info_redactor/redactor.rb', line 12

def ignore_emails
  @ignore_emails
end

Returns the value of attribute ignore_hyperlinks.



12
13
14
# File 'lib/confidential_info_redactor/redactor.rb', line 12

def ignore_hyperlinks
  @ignore_hyperlinks
end

#ignore_numbersObject (readonly)

Returns the value of attribute ignore_numbers.



12
13
14
# File 'lib/confidential_info_redactor/redactor.rb', line 12

def ignore_numbers
  @ignore_numbers
end

#languageObject (readonly)

Returns the value of attribute language.



12
13
14
# File 'lib/confidential_info_redactor/redactor.rb', line 12

def language
  @language
end

#number_textObject (readonly)

Returns the value of attribute number_text.



12
13
14
# File 'lib/confidential_info_redactor/redactor.rb', line 12

def number_text
  @number_text
end

#textObject (readonly)

Returns the value of attribute text.



12
13
14
# File 'lib/confidential_info_redactor/redactor.rb', line 12

def text
  @text
end

#token_textObject (readonly)

Returns the value of attribute token_text.



12
13
14
# File 'lib/confidential_info_redactor/redactor.rb', line 12

def token_text
  @token_text
end

#tokensObject (readonly)

Returns the value of attribute tokens.



12
13
14
# File 'lib/confidential_info_redactor/redactor.rb', line 12

def tokens
  @tokens
end

Instance Method Details

#datesObject



26
27
28
# File 'lib/confidential_info_redactor/redactor.rb', line 26

def dates
  redact_dates(text)
end

#emailsObject



34
35
36
# File 'lib/confidential_info_redactor/redactor.rb', line 34

def emails
  redact_emails(text)
end


38
39
40
# File 'lib/confidential_info_redactor/redactor.rb', line 38

def hyperlinks
  redact_hyperlinks(text)
end

#numbersObject



30
31
32
# File 'lib/confidential_info_redactor/redactor.rb', line 30

def numbers
  redact_numbers(text)
end

#proper_nounsObject



42
43
44
# File 'lib/confidential_info_redactor/redactor.rb', line 42

def proper_nouns
  redact_tokens(text)
end

#redactObject



46
47
48
49
50
51
52
53
54
55
56
# File 'lib/confidential_info_redactor/redactor.rb', line 46

def redact
  if ignore_emails
    redacted_text = text
  else
    redacted_text = redact_emails(text)
  end
  redacted_text = redact_hyperlinks(redacted_text) unless ignore_hyperlinks
  redacted_text = redact_dates(redacted_text) unless ignore_dates
  redacted_text = redact_numbers(redacted_text) unless ignore_numbers
  redact_tokens(redacted_text)
end