Module: Dsl

Overview

Methods to be used in customer specific script files

Constant Summary collapse

COMMA_SPLITTER_REGEX =

Splits comma separated strings that contain commas within the value. Such values have to be enclosed between BEGIN and END Example:

Year,c1+c2,c1=~/[A-Z]{1,2}/,Month
/(?<=,|^)(BEGIN.*?END|\/.*?\/|.*?)(?=,|$)/i
INTEGER_REGEX =

Recognizes a string that represents an integer value

/^\d{1,3}(?:[,\. ]\d{3}|\d)*$/
COMMA_POINT_SPACE_REGEX =
/[,\. ]/
DECIMAL_POINT_REGEX =

Recognizes a string that represents a float value in the form of 1,333.45

/^\d{1,3}(?:[, ]\d{3}|\d)*(?:\.\d*)$/
DECIMAL_COMMA_REGEX =

Recognizes a string that represents a float value in the form of 1.333,45

/^\d{1,3}(?:[\. ]\d{3}|\d)*(?:,\d*)$/
POINT_SPACE_REGEX =

A regex that recognizes ‘.’ and ‘ ’ to be used e.g. in #gsub to optimize performance

/[\. ]/
COMMA_SPACE_REGEX =

A regex that recognizes ‘,’ and ‘ ’ to be used e.g. in #gsub to optimize performance

/[, ]/
POINT =

A point ‘.’ to be used e.g. in #gsub to optimize performance

'.'
COMMA =

A comma ‘,’ to be used e.g. in #gsub to optimize performance

','
SEMICOLON =

A semicolon ‘;’ to be used e.g. in #gsub to optimize performance

';'
EMPTY =

An empty string ” to be used e.g. in #gsub to optimize performance

''

Instance Method Summary collapse

Instance Method Details

#clean_up(files) ⇒ Object

Delete obsolete files :call-seq:

clean_up(%w{ file1 file2 }) -> nil


68
69
70
71
72
# File 'lib/sycsvpro/dsl.rb', line 68

def clean_up(files)
  puts; print "Cleaning up directory..."

  files.each { |file| File.delete(file) }
end

#is_float?(value, decimal_separator = POINT) ⇒ Boolean

Checks if the string represents a float and in case it is a float returns the float value otherwise nil

"1.5" -> 1.5
"1."  -> 1.0

Returns:

  • (Boolean)


131
132
133
134
135
136
137
138
139
140
# File 'lib/sycsvpro/dsl.rb', line 131

def is_float?(value, decimal_separator = POINT)
  if decimal_separator == POINT
    return value.
      gsub(COMMA_SPACE_REGEX, EMPTY) if !(value =~ DECIMAL_POINT_REGEX).nil?
  else
    return value.
      gsub(POINT_SPACE_REGEX, EMPTY).
      gsub(COMMA, POINT) if !(value =~ DECIMAL_COMMA_REGEX).nil?
  end
end

#is_integer?(value) ⇒ Boolean

Checks if the string represents an integer if so returns the integer otherwise nil

Returns:

  • (Boolean)


122
123
124
125
# File 'lib/sycsvpro/dsl.rb', line 122

def is_integer?(value)
  return value.
    gsub(COMMA_POINT_SPACE_REGEX, EMPTY) if !(value =~ INTEGER_REGEX).nil?
end

#paramsObject

read arguments provided at invocation :call-seq:

params => infile, Result, other_params

Result methods are #cols, #col_count, #row_count, #sample_row



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/sycsvpro/dsl.rb', line 36

def params

  script = ARGV.shift
  method = ARGV.shift
  infile = ARGV.shift

  if infile.nil?
    STDERR.puts "You must provide an input file"
    exit -1
  elsif !File.exists? infile
    STDERR.puts "#{infile} does not exist. You must provide a valid input file"
    exit -1
  end

  if ARGV.empty?
    print "#{method}(#{infile})"
  else
    print "#{method}(#{infile}, #{ARGV.join(', ')})"
  end

  puts; print "Analyzing #{infile}..."

  result = Sycsvpro::Analyzer.new(infile).result
  puts; print "> #{result.col_count} cols | #{result.row_count} rows"

  [infile, result, ARGV].flatten

end

#rows(options = {}) ⇒ Object

Retrieves rows and columns from the file and returns them to the block provided by the caller



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/sycsvpro/dsl.rb', line 75

def rows(options={})
  infile     = File.expand_path(options[:infile])
  row_filter = Sycsvpro::RowFilter.new(options[:row_filter]) if options[:row_filter]

  File.new(infile).each_with_index do |line, index|
    next if line.chomp.empty? 
    next if !row_filter.nil? and row_filter.process(line.chomp, row: index).nil?

    values = line.chomp.split(';') 
    params = []
    options.each { |k,v| params << extract_values(values, k, v) if k =~ /column$|columns$/ }

    yield *params
  end
end

#split_by_comma_regex(values) ⇒ Object

Retrieves the values scanned by a COMMA_SPLITTER_REGEX



115
116
117
118
# File 'lib/sycsvpro/dsl.rb', line 115

def split_by_comma_regex(values)
  values.scan(COMMA_SPLITTER_REGEX).flatten.each.
    collect { |h| h.gsub(/BEGIN|END/, "") }
end

#str2num(value, decimal_separator = POINT) ⇒ Object

Converts a string to a numeric if the string represents a numerical value



143
144
145
146
147
148
149
150
151
152
# File 'lib/sycsvpro/dsl.rb', line 143

def str2num(value, decimal_separator = POINT)
  case 
  when v = is_integer?(value)
    v.to_i
  when v = is_float?(value, decimal_separator)
    v.to_f
  else
    value
  end 
end

#str2utf8(str) ⇒ Object

Remove non-UTF chars from string



110
111
112
# File 'lib/sycsvpro/dsl.rb', line 110

def str2utf8(str)
  str.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: EMPTY)
end

#unstring(line) ⇒ Object

Remove leading and trailing “ and spaces as well as reducing more than 2 spaces between words from csv values. Replace ; with , from values as ; is used as value separator



101
102
103
104
105
106
107
# File 'lib/sycsvpro/dsl.rb', line 101

def unstring(line)
  line = str2utf8(line)
  line.scan(/(?<=^"|;")[^"]+(?=;)+[^"]*|;+[^"](?=";|"$)/).each do |value|
    line = line.gsub(value, value.gsub(';', ','))
  end
  line.gsub(/(?<=^|;)\s*"?\s*|\s*"?\s*(?=;|$)/, "").gsub(/\s{2,}/, " ") unless line.nil?
end

#write_to(file) ⇒ Object

writes values provided by a block to the given file



92
93
94
95
96
# File 'lib/sycsvpro/dsl.rb', line 92

def write_to(file)
  File.open(file, 'w') do |out|
    yield out
  end
end