Class: CsvMigration

Inherits:
Object
  • Object
show all
Defined in:
lib/csv_migration.rb

Overview

Description: Parse and test data from a csv file.

Instance Method Summary collapse

Constructor Details

#initialize(file_name:, delimiter: ';') ⇒ CsvMigration



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/csv_migration.rb', line 7

def initialize(file_name:, delimiter: ';')
  # File name for parsing in csv format
  @file_name_csv = file_name
  @delimiter = delimiter

  @file_name = @file_name_csv.split('.csv').first

  # File for export correct data from the base file
  @correct_file_data_csv = File.expand_path("v_parser_correct_#{@file_name}.csv")
  @errors_log = File.expand_path("v_parser_errors_#{@file_name}.log")
  @duplicates_log = File.expand_path("v_parser_duplicates_#{@file_name}.log")
  @not_saved_file_data_errors = File.expand_path("v_parser_not_saved_#{@file_name}.log")

  # Parsing file
  @file_for_parsing = File.expand_path(@file_name_csv)

  # Remove old files
  remove_old_files

  # Count rows in the file without header
  @count_file_lines = `wc -l #{@file_for_parsing}`.split[0].to_i - 1

  @line_num = 0
  @counter_good_records = 0
  @counter_duplicates = 0

  # Raw data from a file without header
  @file_raw_data = []

  # Data after parsing
  @parsed_data = []

  # Header fields from csv file
  @parsing_file_header = []

  # Error statuses
  @errors = {}

  # Errors data
  @errors_data = {}

  # Duplicates records
  @duplicates = {}

  # Errors creating records from the file
  @not_saved_records = []

  # Relation of header name from the file with a specific field name of a table
  #
  # Key: column name in the csv file
  # Value:
  #         field: a field name of a table in a DB (symbol)
  #         require: a field should not be empty (true/false)
  #         replace: need to use @replace_dict ( @replace_dict = { 'what need replace' => 'replace to this' } ) (true/false)
  #         prefix: need to add value as a prefix from a field header name (header name from CSV file) (string)
  #         validate: callback method which necessary call for validating a specific format (symbol)
  #         is_empty: array with fields where need to search data if a value is empty (field name from CSV file header) (array of strings)
  #         default: a value which need set by default in any case (any type)
  #         callback: callback method which necessary call for creating a specific format (symbol)
  @ref_csv_head_from_file = {}

  # Dictionary with fields names from the @ref_csv_head_from_file where need to search duplicates
  @duplicates_dict = %i[]

  # Dictionary for replace a key word to a value word: 'hallo' => 'Hello'
  @replace_dict = {}
end

Instance Method Details

#callObject

Start parsing



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/csv_migration.rb', line 76

def call
  puts "Start parse file #{@file_for_parsing}"

  # Read line from csv file
  File.foreach(@file_for_parsing) do |line|
    data = line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '').chomp.split(@delimiter).map(&:strip)

    if @line_num.zero?
      @parsing_file_header = data.map(&:downcase)
      @line_num += 1
      next
    end

    @file_raw_data << line.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '').chomp

    check = check_require_fields(data)

    unless check[:status]
      @line_num += 1
      puts "Incorrect data! Required field: #{check[:error]} is empty!"
      next
    end

    records = find_data_from_csv(data, @ref_csv_head_from_file)

    @parsed_data << { id: @line_num - 1 }.merge(records)

    puts "Parse left #{@count_file_lines - @line_num} lines"
    @line_num += 1
    @counter_good_records += 1
  end

  duplicates_id_list = check_duplicates
  remove_duplicates(duplicates_id_list) if duplicates_id_list.any?

  save_errors

  create_file_without_errors

  double_duplicates = @counter_good_records + @errors.values.sum + @counter_duplicates - @line_num - 1

  puts
  puts "Testing data was finished. All records in the file (without header): #{@line_num - 1}"
  puts "Good records: #{@counter_good_records}"
  puts "Bad records: #{@errors.values.sum}"
  puts "Duplicate records: #{@counter_duplicates}"
  puts "Duplicates more than one field: #{double_duplicates}" if double_duplicates.positive?
  puts "Successfully parsed records: #{@parsed_data.size}"

  error_actions if !@errors.values.sum.zero? || !@counter_duplicates.zero?

  create_data_to_db

  save_record_errors_to_file if @not_saved_records.any?
end

#remove_old_filesObject

Remove old files



133
134
135
136
137
138
# File 'lib/csv_migration.rb', line 133

def remove_old_files
  File.delete(@errors_log) if File.exist?(@errors_log)
  File.delete(@duplicates_log) if File.exist?(@duplicates_log)
  File.delete(@correct_file_data_csv) if File.exist?(@correct_file_data_csv)
  File.delete(@not_saved_file_data_errors) if File.exist?(@not_saved_file_data_errors)
end