Module: CSVPP::Conversions

Included in:
Parser
Defined in:
lib/csvpp/conversions.rb

Constant Summary collapse

ARRAY_TYPE_RGX =
/(?<array_type>\w+),\s*(?<array_delimiter>\W)/

Class Method Summary collapse

Class Method Details

.clean_decimal(str) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/csvpp/conversions.rb', line 142

def clean_decimal(str)
  return str unless str.is_a?(String)

  val = str.strip
           .gsub(/['`\s]?/, '')               # remove thousand separators
           .sub(/[\sa-zA-Z]*$/, '')           # remove trailing words like "mg"
           .sub(/^-0*(.+)$/, '-\1')           # remove 0 after negative sign: -003 => -3
  if val =~ /^0+$/                            # remove leading zeros
    '0'
  else
    val.gsub( /^0*/, '')
  end
end

.convert(obj, to:, missings: [], **options) ⇒ Object

Returns parsed value, read from obj, interpreted as type given by to.

Parameters:

  • obj (Object)

    object to parse

  • to (String)

    a type, e.g. “int”

  • missings (Array) (defaults to: [])

    list of values that are treated as missings, e.g. [‘NA’, ‘-’, -999]

  • options (Hash)

    options passed on to parsing methods for specific types

Returns:

  • parsed value, read from obj, interpreted as type given by to



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/csvpp/conversions.rb', line 15

def convert(obj, to:, missings: [], **options)
  return nil if missing?(obj, missings)

  if to.start_with?('array')
    to, rest = to.split('<')
    rest = rest.tr('>', '')
    match = rest.match(ARRAY_TYPE_RGX)
    options = options.merge(
      type: match[:array_type],
      delimiter: match[:array_delimiter]
    )
  end

  send("parse_#{to}", obj, **options)
end

.missing?(obj, missings) ⇒ Boolean

Returns:

  • (Boolean)


138
139
140
# File 'lib/csvpp/conversions.rb', line 138

def missing?(obj, missings)
  missings.map(&:to_s).include?(obj.to_s)
end

.parse_array(str, type:, delimiter:, **options) ⇒ Object



31
32
33
# File 'lib/csvpp/conversions.rb', line 31

def parse_array(str, type:, delimiter:, **options)
  str.split(delimiter).map { |entry| send("parse_#{type}", entry) }
end

.parse_boolean(str, true_values: [], false_values: [], **options) ⇒ Object

Returns true or false, or nil if str doesn’t match any value interpreted as true or false.

Parameters:

  • true_values (Array) (defaults to: [])

    : list of values that are interpreted as true

  • false_values (Array) (defaults to: [])

    : list of values that are interpreted as false

Returns:

  • true or false, or nil if str doesn’t match any value interpreted as true or false



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/csvpp/conversions.rb', line 114

def parse_boolean(str,
                  true_values: [],
                  false_values: [],
                  **options)

  cleaned = str.to_s.strip.downcase

  trues = if true_values.empty?
            ['1', 't', 'true']
          else
            true_values.map(&:to_s).map(&:downcase)
          end
  return true if trues.include? cleaned

  falses = if false_values.empty?
             ['0', 'f', 'false']
           else
             false_values.map(&:to_s).map(&:downcase)
           end
  return false if falses.include? cleaned

  nil
end

.parse_chop(str, delimiter: ':', **options) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/csvpp/conversions.rb', line 35

def parse_chop(str, delimiter: ':', **options)
  code, laterality, date = str.split(delimiter)
  code = parse_string(code)
  laterality = parse_string(laterality) if laterality
  laterality = nil if laterality&.empty?
  date = parse_date(date) if date

  {
    code: code,
    laterality: laterality,
    date: date
  }
end

.parse_date(str, **options) ⇒ Object



106
107
108
# File 'lib/csvpp/conversions.rb', line 106

def parse_date(str, **options)
  Date.parse(str.to_s)
end

.parse_decimal(str, **options) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
# File 'lib/csvpp/conversions.rb', line 94

def parse_decimal(str, **options)
  return nil if str.to_s.empty?

  cleaned = clean_decimal(str).to_s

  if cleaned.empty?
    nil
  else
    BigDecimal(cleaned)
  end
end

.parse_float(str, **options) ⇒ Object



89
90
91
92
# File 'lib/csvpp/conversions.rb', line 89

def parse_float(str, **options)
  return nil if str.to_s.empty?
  Float(clean_decimal(str)) rescue nil
end

.parse_int(str, **options) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/csvpp/conversions.rb', line 73

def parse_int(str, **options)
  return nil if str.to_s.empty?

  cleaned = if str.is_a?(String)
              val = str.strip
                      .gsub(/['`\s]?/, '')      # remove thousand separators
                      .sub(/\.\d*/, '')         # remove decimal point and everything thereafter
                      .sub(/[\sa-zA-Z]*$/, '')  # remove trailing words like "mg"
                      .sub(/^-0*(.+)$/, '-\1')  # remove 0 after negative sign: -003 => -3
              val =~ /^0+$/ ? '0' : val.gsub( /^0*/, '')      # remove leading zeros
            else
              str
            end
  Integer(cleaned) rescue nil
end

.parse_medi(str, delimiter: ':', **options) ⇒ Object

See page 3 in documentation/Technisches_Begleitblatt_2017_d.pdf more more info on the medi data type.



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/csvpp/conversions.rb', line 51

def parse_medi(str, delimiter: ':', **options)
  atc_code, annex, application, dose, unit = str.split(delimiter)
  atc_code = parse_string(atc_code)
  annex = parse_string(annex) if annex
  annex = nil if annex&.empty?
  application = parse_string(application)
  dose = parse_decimal(dose)
  unit = parse_string(unit)

  {
    atc_code: atc_code,
    annex: annex,
    application: application,
    dose: dose,
    unit: unit
  }
end

.parse_string(str, **options) ⇒ Object



69
70
71
# File 'lib/csvpp/conversions.rb', line 69

def parse_string(str, **options)
  str.to_s.strip
end