Class: String
- Inherits:
-
Object
- Object
- String
- Defined in:
- lib/ndr_support/string/cleaning.rb,
lib/ndr_support/string/conversions.rb
Constant Summary collapse
- INVALID_CONTROL_CHARS =
/[\x00-\x08\x0b-\x0c\x0e-\x1f]/
- ROMAN_ONE_TO_FIVE_MAPPING =
{ 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }
- POSTCODE_REGEXP =
/ ^( [A-Z][0-9] | [A-Z][0-9][0-9] | [A-Z][0-9][A-Z] | [A-Z][A-Z][0-9] | [A-Z][A-Z][0-9][0-9] | [A-Z][A-Z][0-9][A-Z] ) [0-9][A-Z][A-Z] $/x
- SOUNDEX_CHARS =
'BPFVCSKGJQXZDTLMNR'
- SOUNDEX_NUMS =
'111122222222334556'
- SOUNDEX_CHARS_EX =
'^' + SOUNDEX_CHARS
- SOUNDEX_CHARS_DEL =
'^A-Z'
Instance Method Summary collapse
- #clean(what) ⇒ Object
- #date1 ⇒ Object
- #date2 ⇒ Object
-
#nhs_numberize ⇒ Object
Show NHS numbers with spaces.
-
#postcodeize(option = :user) ⇒ Object
Show postcode in various formats.
- #soundex(census = true) ⇒ Object
- #sounds_like(other) ⇒ Object
-
#squash ⇒ Object
Used for comparing addresses.
- #strip_xml_unsafe_characters ⇒ Object
-
#surname_and_initials ⇒ Object
Convert “SMITH JD” into “Smith JD”.
-
#surnameize ⇒ Object
Like titleize but copes with Scottish and Irish names.
- #thedate ⇒ Object
- #thetime ⇒ Object
-
#to_boolean ⇒ Object
Try to convert the string value into boolean.
-
#to_date(pattern = nil) ⇒ Object
Try to convert the string value into a date.
-
#truncate_hellip(n) ⇒ Object
truncate a string, with a HTML … at the end.
- #xml_unsafe? ⇒ Boolean
Instance Method Details
#clean(what) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/ndr_support/string/cleaning.rb', line 45 def clean(what) case what when :nhsnumber delete('^0-9')[0..9] when :postcode, :get_postcode postcodeize(:db) when :lpi upcase.delete('^0-9A-Z') when :gender if self =~ /\AM(ale)?/i '1' elsif self =~ /\AF(emale)?/i '2' else self end when :sex # SECURE: BNS 2012-10-09: But may behave oddly for multi-line input if self =~ /^M|1/i '1' elsif self =~ /^F|2/i '2' else '0' end when :sex_c if self =~ /^M|1/i 'M' elsif self =~ /^F|2/i 'F' else '' end when :name substitutions = { '.' => '', /,|;/ => ' ', /\s{2,}/ => ' ', '`' => '\'' } substitutions.inject(upcase) { |a, e| a.gsub(*e) }.strip when :ethniccategory replace_ethniccategory = { '0' => '0', '1' => 'M', '2' => 'N', '3' => 'H', '4' => 'J', '5' => 'K', '6' => 'R', '7' => '8', '&' => 'X', ' ' => 'X', '99' => 'X' } replace_ethniccategory[self] || upcase when :code split_on_separators.map do |code| code.blank? ? next : code.delete('.') end.compact.join(' ') when :code_icd warn '[DEPRECATION] clean(:code_icd) is deprecated - consider using clean(:icd) instead.' # regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/ codes = upcase.split_on_separators.delete_if { |x| x.squash.blank? } cleaned_codes = [] codes.each do |code| if code == 'D' || code == 'A' cleaned_codes[-1] += code else cleaned_codes << code end end cleaned_codes.join(' ') when :icd codes = upcase.squish.split_on_separators.reject(&:blank?) codes.map { |code| code.gsub(/(?<=\d)(\.?X?)/, '') }.join(' ') when :code_opcs clean_code_opcs when :hospitalnumber self[-1..-1] =~ /\d/ ? self : self[0..-2] when :xmlsafe, :make_xml_safe strip_xml_unsafe_characters when :roman5 # This deromanises roman numerals between 1 and 5 gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] } when :tnmcategory sub!(/\A[tnm]/i, '') if self =~ /\Ax\z/i upcase else downcase end when :upcase upcase else gsub(' ?', ' ') end end |
#date1 ⇒ Object
41 42 43 |
# File 'lib/ndr_support/string/conversions.rb', line 41 def date1 Daterange.new(self).date1 end |
#date2 ⇒ Object
45 46 47 |
# File 'lib/ndr_support/string/conversions.rb', line 45 def date2 Daterange.new(self).date2 end |
#nhs_numberize ⇒ Object
Show NHS numbers with spaces
75 76 77 78 |
# File 'lib/ndr_support/string/conversions.rb', line 75 def nhs_numberize return self unless length == 10 self[0..2] + ' ' + self[3..5] + ' ' + self[6..9] end |
#postcodeize(option = :user) ⇒ Object
Show postcode in various formats. Parameter “option” can be :user, :compact, :db
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/ndr_support/string/cleaning.rb', line 26 def postcodeize(option = :user) nspce = gsub(/[[:space:]]/, '').upcase unless nspce.blank? || POSTCODE_REGEXP =~ nspce return self # Don't change old-style or malformed postcodes end case option when :compact nspce when :db case nspce.length when 5 then nspce.insert(-4, ' ') when 6 then nspce.insert(-4, ' ') else nspce end else # anything else, including :user --> friendly format nspce.length < 5 ? nspce : nspce.insert(-4, ' ') end end |
#soundex(census = true) ⇒ Object
27 28 29 30 31 32 33 34 35 |
# File 'lib/ndr_support/string/conversions.rb', line 27 def soundex(census = true) str = upcase.delete(SOUNDEX_CHARS_DEL).squeeze str[0..0] + str[1..-1]. delete(SOUNDEX_CHARS_EX). tr(SOUNDEX_CHARS, SOUNDEX_NUMS)[0..(census ? 2 : -1)]. squeeze[0..(census ? 2 : -1)]. ljust(3, '0') rescue '' end |
#sounds_like(other) ⇒ Object
37 38 39 |
# File 'lib/ndr_support/string/conversions.rb', line 37 def sounds_like(other) soundex == other.soundex end |
#squash ⇒ Object
Used for comparing addresses
20 21 22 |
# File 'lib/ndr_support/string/cleaning.rb', line 20 def squash upcase.delete('^A-Z0-9') end |
#strip_xml_unsafe_characters ⇒ Object
144 145 146 |
# File 'lib/ndr_support/string/cleaning.rb', line 144 def strip_xml_unsafe_characters gsub(String::INVALID_CONTROL_CHARS, '') end |
#surname_and_initials ⇒ Object
Convert “SMITH JD” into “Smith JD”
58 59 60 61 62 |
# File 'lib/ndr_support/string/conversions.rb', line 58 def surname_and_initials a = split initials = a.pop a.collect(&:capitalize).join(' ') + ' ' + initials end |
#surnameize ⇒ Object
Like titleize but copes with Scottish and Irish names.
65 66 67 68 69 70 71 72 |
# File 'lib/ndr_support/string/conversions.rb', line 65 def surnameize s = slice(0, 2).upcase if s == 'MC' || s == "O'" s.titleize + slice(2..-1).titleize else titleize end end |
#thedate ⇒ Object
49 50 51 |
# File 'lib/ndr_support/string/conversions.rb', line 49 def thedate Ourdate.new(self).thedate end |
#thetime ⇒ Object
53 54 55 |
# File 'lib/ndr_support/string/conversions.rb', line 53 def thetime Ourtime.new(self).thetime end |
#to_boolean ⇒ Object
Try to convert the string value into boolean
131 132 133 134 135 136 |
# File 'lib/ndr_support/string/conversions.rb', line 131 def to_boolean # SECURE: BNS 2012-10-09: But may behave oddly for multi-line input return true if self == true || self =~ (/^(true|t|yes|y|1)$/i) return false if self == false || self.nil? || self =~ (/^(false|f|no|n|0)$/i) fail ArgumentError, "invalid value for Boolean: \"#{self}\"" end |
#to_date(pattern = nil) ⇒ Object
Try to convert the string value into a date. If given a pattern, use it to parse date, otherwise use default setting to parse it
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/ndr_support/string/conversions.rb', line 87 def to_date(pattern = nil) return '' if empty? # TODO: check if this is used... :/ return nil if blank? pattern = '%d%m%Y' if 'ddmmyyyy' == pattern if pattern == 'yyyymmdd' || pattern == 'yyyymmdd_ons' # Workaround for ONS dates (with missing day / month): revert to old # parsing behaviour. (Instead, EDeathRecord should substitute a Daterange) # TODO: Move all death parsing to format 'yyyymmdd_ons' return nil if self =~ /\A([0-9]{4}00[0-9]{2}|[0-9]{6}00)\Z/ pattern = '%Y%m%d' end if self =~ /\A([0-9][0-9]?)[.]([0-9][0-9]?)[.]([0-9][0-9][0-9][0-9])\Z/ # dd.mm.yyyy return date1 # Uses Daterange to consistently parse our displayed date format end if pattern.to_s.include?('%') # Use Date.strptime if the pattern contains a percent sign parsed_date = DateTime.strptime(self, pattern) Ourdate.build_datetime(parsed_date.year, parsed_date.month, parsed_date.day) else # Use '.' rather than '/' as a separator for more consistent parsing: year, month, day, *_ = ParseDate.parsedate(gsub('/', '.')) if ['yyyy/dd/mm', 'mm/dd/yyyy'].include?(pattern) month, day = day, month elsif 8 == length && self !~ /\d{8}/ # dd/mm/yy, rather than yyyymmdd year, day = day, year year += 100 if year <= Ourdate.today.year % 100 year += 1900 elsif 9 == length # dd/mmm/yy, rare case. year += 100 if year <= Ourdate.today.year % 100 year += 1900 end Ourdate.build_datetime(year, month, day) end end |
#truncate_hellip(n) ⇒ Object
truncate a string, with a HTML … at the end
81 82 83 |
# File 'lib/ndr_support/string/conversions.rb', line 81 def truncate_hellip(n) length > n ? slice(0, n - 1) + '…' : self end |
#xml_unsafe? ⇒ Boolean
148 149 150 |
# File 'lib/ndr_support/string/cleaning.rb', line 148 def xml_unsafe? self =~ String::INVALID_CONTROL_CHARS end |