Class: Cure::Extract::Extractor

Inherits:
Object
  • Object
show all
Includes:
Configuration, Helpers::FileHelpers, Log
Defined in:
lib/cure/extract/extractor.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Helpers::FileHelpers

#clean_dir, #read_file, #with_file, #with_temp_dir

Methods included from Configuration

#config, #create_config, #register_config

Methods included from Log

#log_debug, #log_error, #log_info, #log_warn

Constructor Details

#initialize(opts) ⇒ Extractor

Returns a new instance of Extractor.

Parameters:

  • opts (Hash)


19
20
21
# File 'lib/cure/extract/extractor.rb', line 19

def initialize(opts)
  @opts = opts
end

Instance Attribute Details

#optsObject (readonly)

Parameters:

  • opts (Hash)


16
17
18
# File 'lib/cure/extract/extractor.rb', line 16

def opts
  @opts
end

Instance Method Details

#extract_from_contents(file_contents) ⇒ WrappedCSV

Parameters:

  • file_contents (String)

Returns:



32
33
34
35
36
# File 'lib/cure/extract/extractor.rb', line 32

def extract_from_contents(file_contents)
  parsed_content = parse_csv(file_contents, header: :none)
  log_info("Parsed CSV into #{parsed_content.content.length} sections.")
  parsed_content
end

#extract_from_file(csv_file_location) ⇒ WrappedCSV

Parameters:

  • csv_file_location (String)

Returns:



25
26
27
28
# File 'lib/cure/extract/extractor.rb', line 25

def extract_from_file(csv_file_location)
  file_contents = read_file(csv_file_location)
  extract_from_contents(file_contents)
end

#extract_from_rows(rows, named_range) ⇒ Object

Parameters:

  • rows (Array<Array>)


78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/cure/extract/extractor.rb', line 78

def extract_from_rows(rows, named_range)
  psx = CsvLookup.array_position_lookup(named_range)

  ret_val = []
  rows.each_with_index do |row, idx|
    # If the position of the end row is -1, we need all,
    # otherwise if its between/equal to start/finish
    ret_val << row[psx[0]..psx[1]] if psx[3] == -1 || (idx >= psx[2] && idx <= psx[3])
  end

  ret_val
end

#extract_named_ranges(csv_rows) ⇒ Array<Hash>

Parameters:

  • csv_rows (Array<Array>)

Returns:

  • (Array<Hash>)


57
58
59
60
61
62
63
64
65
66
67
# File 'lib/cure/extract/extractor.rb', line 57

def extract_named_ranges(csv_rows)
  # Use only the NR's that are defined from the candidates list
  candidates = config.template.transformations.candidates
  candidate_nrs = config.template.extraction.required_named_ranges(candidates.map(&:named_range).uniq)
  candidate_nrs.map do |nr|
    {
      "rows" => extract_from_rows(csv_rows, nr["section"]),
      "name" => nr["name"]
    }
  end
end

#extract_variables(csv_rows) ⇒ Hash

Parameters:

  • csv_rows (Array<Array>)

Returns:

  • (Hash)


71
72
73
74
75
# File 'lib/cure/extract/extractor.rb', line 71

def extract_variables(csv_rows)
  config.template.extraction.variables.each_with_object({}) do |variable, hash|
    hash[variable["name"]] = lookup_location(csv_rows, variable["location"])
  end
end

#handle_row(row_idx, row, psx) ⇒ Array?

Parameters:

  • row_idx (Integer)
  • row (Array)
  • psx (Array)

Returns:

  • (Array, nil)


103
104
105
106
107
# File 'lib/cure/extract/extractor.rb', line 103

def handle_row(row_idx, row, psx)
  return nil unless psx[3] == -1 || (row_idx >= psx[2] && row_idx <= psx[3])

  row[psx[0]..psx[1]]
end

#lookup_location(rows, variable_location) ⇒ Object

Parameters:

  • rows (Array<Array>)
  • variable_location (String)


93
94
95
96
97
# File 'lib/cure/extract/extractor.rb', line 93

def lookup_location(rows, variable_location)
  psx = [CsvLookup.position_for_letter(variable_location),
         CsvLookup.position_for_digit(variable_location)]
  rows[psx[1]][psx[0]]
end

#parse_csv(file_contents, opts = {}) ⇒ WrappedCSV

Parameters:

  • file_contents (String)
  • opts (Hash) (defaults to: {})

Returns:



43
44
45
46
47
48
49
50
51
52
53
# File 'lib/cure/extract/extractor.rb', line 43

def parse_csv(file_contents, opts={})
  csv_rows = []

  Rcsv.parse(file_contents, opts) { |row| csv_rows << row }

  result = WrappedCSV.new
  result.content = extract_named_ranges(csv_rows)
  result.variables = extract_variables(csv_rows)

  result
end