Class: Datanorm::File

Inherits:
Object
  • Object
show all
Includes:
Logging, Enumerable
Defined in:
lib/datanorm/file.rb

Overview

Parses a datanorm file line by line and wraps them in Ruby objects.

Instance Method Summary collapse

Methods included from Logging

included

Constructor Details

#initialize(path:) ⇒ File

Returns a new instance of File.



9
10
11
12
# File 'lib/datanorm/file.rb', line 9

def initialize(path:)
  log { "Loading file `#{path}`" }
  @path = path
end

Instance Method Details

#dateObject



33
34
35
# File 'lib/datanorm/file.rb', line 33

def date
  header.date
end

#eachObject



37
38
39
40
41
42
43
44
45
46
47
# File 'lib/datanorm/file.rb', line 37

def each
  line_number = 0

  ::CSV.foreach(path, **options) do |columns|
    line_number += 1
    next if line_number == 1 # Skip header, it's parsed separately
    next if columns.empty? # Empty line

    yield ::Datanorm::Lines::Parse.call(version:, columns:, source_line_number: line_number)
  end
end

#headerObject



14
15
16
17
18
19
20
21
22
23
# File 'lib/datanorm/file.rb', line 14

def header
  return @header if defined?(@header)

  ::File.open(path, "r:#{Encoding::CP850}") do |file|
    first_line = file.gets
    log { 'Parsing header line...' }

    @header = ::Datanorm::Header.new(line: first_line)
  end
end

#lines_countObject

We want this, so that we can indicate how much progress has been done.



50
51
52
53
54
55
56
57
58
59
# File 'lib/datanorm/file.rb', line 50

def lines_count
  return @lines_count if defined?(@lines_count)

  log { 'Scanning number of total lines... (this takes about 2 seconds per GB)' }
  @lines_count = 0
  # `foreach` doesn't load the entire file into memory.
  ::File.foreach(path, encoding: Encoding::CP850) { @lines_count += 1 }
  log { "Scan complete, counted #{@lines_count} lines." }
  @lines_count
end

#titleObject



29
30
31
# File 'lib/datanorm/file.rb', line 29

def title
  header.title
end

#versionObject



25
26
27
# File 'lib/datanorm/file.rb', line 25

def version
  header.version
end