Class: String

Inherits:

Object

Object
String

show all

Defined in:: lib/ndr_support/string/cleaning.rb,
lib/ndr_support/string/conversions.rb

Constant Summary collapse

INVALID_CONTROL_CHARS =

/[\x00-\x08\x0b-\x0c\x0e-\x1f]/

ROMAN_ONE_TO_FIVE_MAPPING =

{ 'I' => '1', 'II' => '2', 'III' => '3', 'IIII' => '4', 'IV' => '4', 'V' => '5' }

POSTCODE_REGEXP =

/
  ^(
    [A-Z][0-9]           |
    [A-Z][0-9][0-9]      |
    [A-Z][0-9][A-Z]      |
    [A-Z][A-Z][0-9]      |
    [A-Z][A-Z][0-9][0-9] |
    [A-Z][A-Z][0-9][A-Z]
  )
  [0-9][A-Z][A-Z]
$/x

SOUNDEX_CHARS =

'BPFVCSKGJQXZDTLMNR'

SOUNDEX_NUMS =

'111122222222334556'

SOUNDEX_CHARS_EX =

'^' + SOUNDEX_CHARS

SOUNDEX_CHARS_DEL =

'^A-Z'

Instance Method Summary collapse

#clean(what) ⇒ Object
#date1 ⇒ Object
#date2 ⇒ Object
#nhs_numberize ⇒ Object

Show NHS numbers with spaces.
#postcodeize(option = :user) ⇒ Object

Show postcode in various formats.
#soundex(census = true) ⇒ Object

desc: en.wikipedia.org/wiki/Soundex.
#sounds_like(other) ⇒ Object
#squash ⇒ Object

Used for comparing addresses.
#strip_xml_unsafe_characters ⇒ Object
#surname_and_initials ⇒ Object

Convert “SMITH JD” into “Smith JD”.
#surnameize ⇒ Object

Like titleize but copes with Scottish and Irish names.
#thedate ⇒ Object
#thetime ⇒ Object
#to_boolean ⇒ Object

Try to convert the string value into boolean.
#to_date(pattern = nil) ⇒ Object

Try to convert the string value into a date.
#truncate_hellip(n) ⇒ Object

truncate a string, with a HTML … at the end.
#xml_unsafe? ⇒ Boolean

Instance Method Details

#clean(what) ⇒ `Object`

# File 'lib/ndr_support/string/cleaning.rb', line 45

def clean(what)
  case what
  when :nhsnumber
    delete('^0-9')[0..9]
  when :postcode, :get_postcode
    postcodeize(:db)
  when :lpi
    upcase.delete('^0-9A-Z')
  when :gender
    if self =~ /\AM(ale)?/i
      '1'
    elsif self =~ /\AF(emale)?/i
      '2'
    else
      self
    end
  when :sex
    # SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
    if self =~ /^M|1/i
      '1'
    elsif self =~ /^F|2/i
      '2'
    else
      '0'
    end
  when :sex_c
    if self =~ /^M|1/i
      'M'
    elsif self =~ /^F|2/i
      'F'
    else
      ''
    end
  when :name
    substitutions = {
      '.'      => '',
      /,|;/    => ' ',
      /\s{2,}/ => ' ',
      '`'      => '\''
    }
    substitutions.inject(upcase) { |a, e| a.gsub(*e) }.strip
  when :ethniccategory
    replace_ethniccategory = {
      '0' => '0',
      '1' => 'M',
      '2' => 'N',
      '3' => 'H',
      '4' => 'J',
      '5' => 'K',
      '6' => 'R',
      '7' => '8',
      '&' => 'X',
      ' ' => 'X',
      '99' => 'X'
    }
    replace_ethniccategory[self] || upcase
  when :code
    split_on_separators.map do |code|
      code.blank? ? next : code.delete('.')
    end.compact.join(' ')
  when :code_icd
    warn '[DEPRECATION] clean(:code_icd) is deprecated - consider using clean(:icd) instead.'
    # regexp = /[A-Z][0-9]{2}(\.(X|[0-9]{1,2})|[0-9]?)( *(D|A)( |,|;|$))/
    codes = upcase.split_on_separators.delete_if { |x| x.squash.blank? }
    cleaned_codes = []
    codes.each do |code|
      if code == 'D' || code == 'A'
        cleaned_codes[-1] += code
      else
        cleaned_codes << code
      end
    end
    cleaned_codes.join(' ')
  when :icd
    codes = upcase.squish.split_on_separators.reject(&:blank?)
    codes.map { |code| code.gsub(/(?<=\d)(\.?X?)/, '') }.join(' ')
  when :code_opcs
    clean_code_opcs
  when :hospitalnumber
    self[-1..-1] =~ /\d/ ? self : self[0..-2]
  when :xmlsafe, :make_xml_safe
    strip_xml_unsafe_characters
  when :roman5
    # This deromanises roman numerals between 1 and 5
    gsub(/[IV]+/i) { |match| ROMAN_ONE_TO_FIVE_MAPPING[match.upcase] }
  when :tnmcategory
    sub!(/\A[tnm]/i, '')
    if self =~ /\Ax\z/i
      upcase
    else
      downcase
    end
  when :upcase
    upcase
  else
    gsub(' ?', ' ')
  end
end

#date1 ⇒ `Object`



41
42
43

# File 'lib/ndr_support/string/conversions.rb', line 41

def date1
  Daterange.new(self).date1
end

#date2 ⇒ `Object`



45
46
47

# File 'lib/ndr_support/string/conversions.rb', line 45

def date2
  Daterange.new(self).date2
end

#nhs_numberize ⇒ `Object`

Show NHS numbers with spaces

# File 'lib/ndr_support/string/conversions.rb', line 75

def nhs_numberize
  return self unless length == 10
  self[0..2] + ' ' + self[3..5] + ' ' + self[6..9]
end

#postcodeize(option = :user) ⇒ `Object`

Show postcode in various formats. Parameter “option” can be :user, :compact, :db

# File 'lib/ndr_support/string/cleaning.rb', line 26

def postcodeize(option = :user)
  nspce = gsub(/[[:space:]]/, '').upcase
  unless nspce.blank? || POSTCODE_REGEXP =~ nspce
    return self # Don't change old-style or malformed postcodes
  end
  case option
  when :compact
    nspce
  when :db
    case nspce.length
    when 5 then nspce.insert(-4, '  ')
    when 6 then nspce.insert(-4, ' ')
    else nspce
    end
  else # anything else, including :user --> friendly format
    nspce.length < 5 ? nspce : nspce.insert(-4, ' ')
  end
end

#soundex(census = true) ⇒ `Object`

desc: en.wikipedia.org/wiki/Soundex

# File 'lib/ndr_support/string/conversions.rb', line 27

def soundex(census = true)
  str = upcase.delete(SOUNDEX_CHARS_DEL).squeeze

  str[0..0] + str[1..-1].
    delete(SOUNDEX_CHARS_EX).
    tr(SOUNDEX_CHARS, SOUNDEX_NUMS)[0..(census ? 2 : -1)].
    squeeze[0..(census ? 2 : -1)].
    ljust(3, '0') rescue ''
end

#sounds_like(other) ⇒ `Object`



37
38
39

# File 'lib/ndr_support/string/conversions.rb', line 37

def sounds_like(other)
  soundex == other.soundex
end

#squash ⇒ `Object`

Used for comparing addresses



20
21
22

# File 'lib/ndr_support/string/cleaning.rb', line 20

def squash
  upcase.delete('^A-Z0-9')
end

#strip_xml_unsafe_characters ⇒ `Object`



144
145
146

# File 'lib/ndr_support/string/cleaning.rb', line 144

def strip_xml_unsafe_characters
  gsub(String::INVALID_CONTROL_CHARS, '')
end

#surname_and_initials ⇒ `Object`

Convert “SMITH JD” into “Smith JD”

# File 'lib/ndr_support/string/conversions.rb', line 58

def surname_and_initials
  a = split
  initials = a.pop
  a.collect(&:capitalize).join(' ') + ' ' + initials
end

#surnameize ⇒ `Object`

Like titleize but copes with Scottish and Irish names.

# File 'lib/ndr_support/string/conversions.rb', line 65

def surnameize
  s = slice(0, 2).upcase
  if s == 'MC' || s == "O'"
    s.titleize + slice(2..-1).titleize
  else
    titleize
  end
end

#thedate ⇒ `Object`



49
50
51

# File 'lib/ndr_support/string/conversions.rb', line 49

def thedate
  Ourdate.new(self).thedate
end

#thetime ⇒ `Object`



53
54
55

# File 'lib/ndr_support/string/conversions.rb', line 53

def thetime
  Ourtime.new(self).thetime
end

#to_boolean ⇒ `Object`

Try to convert the string value into boolean

# File 'lib/ndr_support/string/conversions.rb', line 131

def to_boolean
  # SECURE: BNS 2012-10-09: But may behave oddly for multi-line input
  return true if self == true || self =~ (/^(true|t|yes|y|1)$/i)
  return false if self == false || self.nil? || self =~ (/^(false|f|no|n|0)$/i)
  fail ArgumentError, "invalid value for Boolean: \"#{self}\""
end

#to_date(pattern = nil) ⇒ `Object`

Try to convert the string value into a date. If given a pattern, use it to parse date, otherwise use default setting to parse it

# File 'lib/ndr_support/string/conversions.rb', line 87

def to_date(pattern = nil)
  return ''  if empty? # TODO: check if this is used... :/
  return nil if blank?

  pattern = '%d%m%Y' if 'ddmmyyyy' == pattern

  if pattern == 'yyyymmdd' || pattern == 'yyyymmdd_ons'
    # Workaround for ONS dates (with missing day / month): revert to old
    # parsing behaviour. (Instead, EDeathRecord should substitute a Daterange)
    # TODO: Move all death parsing to format 'yyyymmdd_ons'
    return nil if self =~ /\A([0-9]{4}00[0-9]{2}|[0-9]{6}00)\Z/
    pattern = '%Y%m%d'
  end

  if self =~ /\A([0-9][0-9]?)[.]([0-9][0-9]?)[.]([0-9][0-9][0-9][0-9])\Z/ # dd.mm.yyyy
    return date1 # Uses Daterange to consistently parse our displayed date format
  end

  if pattern.to_s.include?('%')
    # Use Date.strptime if the pattern contains a percent sign
    parsed_date = DateTime.strptime(self, pattern)
    Ourdate.build_datetime(parsed_date.year, parsed_date.month, parsed_date.day)
  else
    # Use '.' rather than '/' as a separator for more consistent parsing:
    year, month, day, *_ = ParseDate.parsedate(gsub('/', '.'))

    if ['yyyy/dd/mm', 'mm/dd/yyyy'].include?(pattern)
      month, day = day, month
    elsif 8 == length && self !~ /\d{8}/
      # dd/mm/yy, rather than yyyymmdd
      year, day = day, year
      year += 100 if year <= Ourdate.today.year % 100
      year += 1900
    elsif 9 == length
      # dd/mmm/yy, rare case.
      year += 100 if year <= Ourdate.today.year % 100
      year += 1900
    end

    Ourdate.build_datetime(year, month, day)
  end
end

#truncate_hellip(n) ⇒ `Object`

truncate a string, with a HTML … at the end



81
82
83

# File 'lib/ndr_support/string/conversions.rb', line 81

def truncate_hellip(n)
  length > n ? slice(0, n - 1) + '&hellip;' : self
end

#xml_unsafe? ⇒ `Boolean`

Returns:

(Boolean)



148
149
150

# File 'lib/ndr_support/string/cleaning.rb', line 148

def xml_unsafe?
  self =~ String::INVALID_CONTROL_CHARS
end

Class: String

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#clean(what) ⇒ Object

#date1 ⇒ Object

#date2 ⇒ Object

#nhs_numberize ⇒ Object

#postcodeize(option = :user) ⇒ Object

#soundex(census = true) ⇒ Object

#sounds_like(other) ⇒ Object

#squash ⇒ Object

#strip_xml_unsafe_characters ⇒ Object

#surname_and_initials ⇒ Object

#surnameize ⇒ Object

#thedate ⇒ Object

#thetime ⇒ Object

#to_boolean ⇒ Object

#to_date(pattern = nil) ⇒ Object

#truncate_hellip(n) ⇒ Object

#xml_unsafe? ⇒ Boolean