Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/ndr_support/string/cleaning.rb,
lib/ndr_support/string/conversions.rb

Constant Summary collapse

INVALID_CONTROL_CHARS =
/[\x00-\x08\x0b-\x0c\x0e-\x1f]/
ROMAN_ONE_TO_FIVE_MAPPING =
{ 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }
POSTCODE_REGEXP =
/
  ^(
    [A-Z][0-9]           |
    [A-Z][0-9][0-9]      |
    [A-Z][0-9][A-Z]      |
    [A-Z][A-Z][0-9]      |
    [A-Z][A-Z][0-9][0-9] |
    [A-Z][A-Z][0-9][A-Z]
  )
  [0-9][A-Z][A-Z]
$/x
SOUNDEX_CHARS =
'BPFVCSKGJQXZDTLMNR'
SOUNDEX_NUMS =
'111122222222334556'
SOUNDEX_CHARS_EX =
'^' + SOUNDEX_CHARS
SOUNDEX_CHARS_DEL =
'^A-Z'

Instance Method Summary collapse

Instance Method Details

#clean(what) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/ndr_support/string/cleaning.rb', line 45

def clean(what)
  case what
  when :nhsnumber
    delete('^0-9')[0..9]
  when :postcode, :get_postcode
    postcodeize(:db)
  when :lpi
    upcase.delete('^0-9A-Z')
  when :gender
    if self =~ /\AM(ale)?/i
      '1'
    elsif self =~ /\AF(emale)?/i
      '2'
    else
      self
    end
  when :sex
    # SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
    if self =~ /^M|1/i
      '1'
    elsif self =~ /^F|2/i
      '2'
    else
      '0'
    end
  when :sex_c
    if self =~ /^M|1/i
      'M'
    elsif self =~ /^F|2/i
      'F'
    else
      ''
    end
  when :name
    substitutions = {
      '.'      => '',
      /,|;/    => ' ',
      /\s{2,}/ => ' ',
      '`'      => '\''
    }
    substitutions.inject(upcase) { |a, e| a.gsub(*e) }.strip
  when :ethniccategory
    replace_ethniccategory = {
      '0' => '0',
      '1' => 'M',
      '2' => 'N',
      '3' => 'H',
      '4' => 'J',
      '5' => 'K',
      '6' => 'R',
      '7' => '8',
      '&' => 'X',
      ' ' => 'X',
      '99' => 'X'
    }
    replace_ethniccategory[self] || upcase
  when :code
    split_on_separators.map do |code|
      code.blank? ? next : code.delete('.')
    end.compact.join(' ')
  when :code_icd
    warn '[DEPRECATION] clean(:code_icd) is deprecated - consider using clean(:icd) instead.'
    # regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/
    codes = upcase.split_on_separators.delete_if { |x| x.squash.blank? }
    cleaned_codes = []
    codes.each do |code|
      if code == 'D' || code == 'A'
        cleaned_codes[-1] += code
      else
        cleaned_codes << code
      end
    end
    cleaned_codes.join(' ')
  when :icd
    codes = upcase.squish.split_on_separators.reject(&:blank?)
    codes.map { |code| code.gsub(/(?<=\d)(\.?X?)/, '') }.join(' ')
  when :code_opcs
    clean_code_opcs
  when :hospitalnumber
    self[-1..-1] =~ /\d/ ? self : self[0..-2]
  when :xmlsafe, :make_xml_safe
    strip_xml_unsafe_characters
  when :roman5
    # This deromanises roman numerals between 1 and 5
    gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] }
  when :tnmcategory
    sub!(/\A[tnm]/i, '')
    if self =~ /\Ax\z/i
      upcase
    else
      downcase
    end
  when :upcase
    upcase
  else
    gsub(' ?', ' ')
  end
end

#date1Object



41
42
43
# File 'lib/ndr_support/string/conversions.rb', line 41

def date1
  Daterange.new(self).date1
end

#date2Object



45
46
47
# File 'lib/ndr_support/string/conversions.rb', line 45

def date2
  Daterange.new(self).date2
end

#nhs_numberizeObject

Show NHS numbers with spaces



75
76
77
78
# File 'lib/ndr_support/string/conversions.rb', line 75

def nhs_numberize
  return self unless length == 10
  self[0..2] + ' ' + self[3..5] + ' ' + self[6..9]
end

#postcodeize(option = :user) ⇒ Object

Show postcode in various formats. Parameter “option” can be :user, :compact, :db



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/ndr_support/string/cleaning.rb', line 26

def postcodeize(option = :user)
  nspce = gsub(/[[:space:]]/, '').upcase
  unless nspce.blank? || POSTCODE_REGEXP =~ nspce
    return self # Don't change old-style or malformed postcodes
  end
  case option
  when :compact
    nspce
  when :db
    case nspce.length
    when 5 then nspce.insert(-4, '  ')
    when 6 then nspce.insert(-4, ' ')
    else nspce
    end
  else # anything else, including :user --> friendly format
    nspce.length < 5 ? nspce : nspce.insert(-4, ' ')
  end
end

#soundex(census = true) ⇒ Object



27
28
29
30
31
32
33
34
35
# File 'lib/ndr_support/string/conversions.rb', line 27

def soundex(census = true)
  str = upcase.delete(SOUNDEX_CHARS_DEL).squeeze

  str[0..0] + str[1..-1].
    delete(SOUNDEX_CHARS_EX).
    tr(SOUNDEX_CHARS, SOUNDEX_NUMS)[0..(census ? 2 : -1)].
    squeeze[0..(census ? 2 : -1)].
    ljust(3, '0') rescue ''
end

#sounds_like(other) ⇒ Object



37
38
39
# File 'lib/ndr_support/string/conversions.rb', line 37

def sounds_like(other)
  soundex == other.soundex
end

#squashObject

Used for comparing addresses



20
21
22
# File 'lib/ndr_support/string/cleaning.rb', line 20

def squash
  upcase.delete('^A-Z0-9')
end

#strip_xml_unsafe_charactersObject



144
145
146
# File 'lib/ndr_support/string/cleaning.rb', line 144

def strip_xml_unsafe_characters
  gsub(String::INVALID_CONTROL_CHARS, '')
end

#surname_and_initialsObject

Convert “SMITH JD” into “Smith JD”



58
59
60
61
62
# File 'lib/ndr_support/string/conversions.rb', line 58

def surname_and_initials
  a = split
  initials = a.pop
  a.collect(&:capitalize).join(' ') + ' ' + initials
end

#surnameizeObject

Like titleize but copes with Scottish and Irish names.



65
66
67
68
69
70
71
72
# File 'lib/ndr_support/string/conversions.rb', line 65

def surnameize
  s = slice(0, 2).upcase
  if s == 'MC' || s == "O'"
    s.titleize + slice(2..-1).titleize
  else
    titleize
  end
end

#thedateObject



49
50
51
# File 'lib/ndr_support/string/conversions.rb', line 49

def thedate
  Ourdate.new(self).thedate
end

#thetimeObject



53
54
55
# File 'lib/ndr_support/string/conversions.rb', line 53

def thetime
  Ourtime.new(self).thetime
end

#to_booleanObject

Try to convert the string value into boolean



131
132
133
134
135
136
# File 'lib/ndr_support/string/conversions.rb', line 131

def to_boolean
  # SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
  return true if self == true || self =~ (/^(true|t|yes|y|1)$/i)
  return false if self == false || self.nil? || self =~ (/^(false|f|no|n|0)$/i)
  fail ArgumentError, "invalid value for Boolean: \"#{self}\""
end

#to_date(pattern = nil) ⇒ Object

Try to convert the string value into a date. If given a pattern, use it to parse date, otherwise use default setting to parse it



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/ndr_support/string/conversions.rb', line 87

def to_date(pattern = nil)
  return ''  if empty? # TODO: check if this is used... :/
  return nil if blank?

  pattern = '%d%m%Y' if 'ddmmyyyy' == pattern

  if pattern == 'yyyymmdd' || pattern == 'yyyymmdd_ons'
    # Workaround for ONS dates (with missing day / month): revert to old
    # parsing behaviour. (Instead, EDeathRecord should substitute a Daterange)
    # TODO: Move all death parsing to format 'yyyymmdd_ons'
    return nil if self =~ /\A([0-9]{4}00[0-9]{2}|[0-9]{6}00)\Z/
    pattern = '%Y%m%d'
  end

  if self =~ /\A([0-9][0-9]?)[.]([0-9][0-9]?)[.]([0-9][0-9][0-9][0-9])\Z/ # dd.mm.yyyy
    return date1 # Uses Daterange to consistently parse our displayed date format
  end

  if pattern.to_s.include?('%')
    # Use Date.strptime if the pattern contains a percent sign
    parsed_date = DateTime.strptime(self, pattern)
    Ourdate.build_datetime(parsed_date.year, parsed_date.month, parsed_date.day)
  else
    # Use '.' rather than '/' as a separator for more consistent parsing:
    year, month, day, *_ = ParseDate.parsedate(gsub('/', '.'))

    if ['yyyy/dd/mm', 'mm/dd/yyyy'].include?(pattern)
      month, day = day, month
    elsif 8 == length && self !~ /\d{8}/
      # dd/mm/yy, rather than yyyymmdd
      year, day = day, year
      year += 100 if year <= Ourdate.today.year % 100
      year += 1900
    elsif 9 == length
      # dd/mmm/yy, rare case.
      year += 100 if year <= Ourdate.today.year % 100
      year += 1900
    end

    Ourdate.build_datetime(year, month, day)
  end
end

#truncate_hellip(n) ⇒ Object

truncate a string, with a HTML &hellip; at the end



81
82
83
# File 'lib/ndr_support/string/conversions.rb', line 81

def truncate_hellip(n)
  length > n ? slice(0, n - 1) + '&hellip;' : self
end

#xml_unsafe?Boolean

Returns:

  • (Boolean)


148
149
150
# File 'lib/ndr_support/string/cleaning.rb', line 148

def xml_unsafe?
  self =~ String::INVALID_CONTROL_CHARS
end