Module: Eco::Data::Locations::NodeLevel::Cleaner

Includes:
Convert, Language::AuxiliarLogger
Included in:
Parsing, Serial
Defined in:
lib/eco/data/locations/node_level/cleaner.rb

Instance Attribute Summary

Attributes included from Language::AuxiliarLogger

#logger

Instance Method Summary collapse

Methods included from Convert

#csv_from, #empty_array, #empty_level_tracker_hash, #hash_tree_to_tree_csv, #log_pretty_inspect, #normalize_arrays, #report_repeated_node_ids

Methods included from Language::AuxiliarLogger

#log

Instance Method Details

#done_idsObject

Tracker helper (those done)



89
90
91
# File 'lib/eco/data/locations/node_level/cleaner.rb', line 89

def done_ids
  @done_ids ||= []
end

#fill_in_parents(nodes) ⇒ Object

Sets the parentId property. Although with normalized nodes parents are self-contained we use this method



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/eco/data/locations/node_level/cleaner.rb', line 58

def fill_in_parents(nodes)
  nodes.tap do
    prev_nodes = empty_level_tracker_hash(11)
    nodes.each do |node|
      expected_parent_id = node.clean_parent_id&.upcase
      msg = "Expecting node '#{node.id}' to have parent: '#{expected_parent_id}'\n"
      if (parent_node = prev_nodes[node.actual_level - 1])
        node.parentId = parent_node.id
        unless expected_parent_id == node.parentId
          log(:warn) {
            msg + " * We got '#{parent_node.id}' instead"
          }
        end
      elsif node.actual_level == 1
        # expected to not have parent
      else
        log(:warn) {
          "#{msg} but we did not get parent."
        }
      end
      prev_nodes[node.actual_level] = node
    end
  end
end

#repeated_idsObject

Tracker helper (those repeated)



84
85
86
# File 'lib/eco/data/locations/node_level/cleaner.rb', line 84

def repeated_ids
  @repeated_ids ||= []
end

#reset_trackers!Object



93
94
95
96
# File 'lib/eco/data/locations/node_level/cleaner.rb', line 93

def reset_trackers!
  @done_ids     = []
  @repeated_ids = []
end

#tidy_nodes(nodes, prev_node: nil, main: true) ⇒ Array<NodeLevel>

Note:
  1. It first discards node ids/tags that have been already pulled (discard repeated)
  2. For non repeated, it identifies if there's a gap (jump of multiple levels)
  3. It covers the gap if present by decoupling merged parent(s) from the same node (see node.decouple)
  4. Then, it delegates the filling in of parents to fill_in_parents function.

Prevents repeated node ids/tags, decouples merged levels, covers gaps (jumping multiple levels)

Returns:

  • (Array<NodeLevel>)

    child to parent relationships solved and no double-ups.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/eco/data/locations/node_level/cleaner.rb', line 14

def tidy_nodes(nodes, prev_node: nil, main: true) # rubocop:disable Metrics/AbcSize
  reset_trackers! if main

  prev_level = prev_node&.actual_level || 0

  nodes.each_with_object([]) do |node, out|
    if done_ids.include?(node.id)
      row_str = node.row_num ? " - (row: #{node.row_num})" : ''
      repeated_ids << "#{node.id} (level: #{node.level})#{row_str}"
    else
      level = node.actual_level
      common_level   = node.common_level_with(prev_node)
      common_level ||= prev_level
      gap            = level - (common_level + 1)

      unless gap < 1
        msg = "(Row: #{node.row_num}) ID/Tag '#{node.id}' (lev #{level}) jumps #{gap} level(s) (expected #{prev_level + 1})."
        #puts "  " + node.tags_array.pretty_inspect
        missing_nodes = node.decouple(gap)
        msg << "\n  Adding missing upper node(s): "
        msg << missing_nodes.map(&:raw_tag).pretty_inspect

        log(:debug) { msg }
        # The very top missing node (first in list) should be checked against prev_level
        # alongside any descendants in missing_nodes (when gap 2+)
        tidied_nodes = tidy_nodes(missing_nodes, prev_node: prev_node, main: false)
        out.push(*tidied_nodes)
      end

      out       << node
      done_ids  << node.id
      prev_node  = node
    end
  end.tap do |out|
    if main
      report_repeated_node_ids(repeated_ids)
      fill_in_parents(out)
    end
  end
end