Module: Eco::Data::Locations::NodeLevel::Cleaner

Includes:
Convert, Language::AuxiliarLogger
Included in:
Parsing, Serial
Defined in:
lib/eco/data/locations/node_level/cleaner.rb

Instance Attribute Summary

Attributes included from Language::AuxiliarLogger

#logger

Instance Method Summary collapse

Methods included from Convert

#csv_from, #empty_array, #empty_level_tracker_hash, #hash_tree_to_tree_csv, #log_pretty_inspect, #normalize_arrays, #report_repeated_node_ids

Methods included from Language::AuxiliarLogger

#log

Instance Method Details

#done_idsObject

Tracker helper (those done)



84
85
86
# File 'lib/eco/data/locations/node_level/cleaner.rb', line 84

def done_ids
  @done_ids ||= []
end

#fill_in_parents(nodes) ⇒ Object

Sets the parentId property. Although with normalized nodes parents are self-contained we use this method



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/eco/data/locations/node_level/cleaner.rb', line 55

def fill_in_parents(nodes)
  nodes.tap do |nodes|
    prev_nodes = empty_level_tracker_hash(11)
    nodes.each do |node|
      expected_parent_id = node.clean_parent_id&.upcase
      msg  = "Expecting node '#{node.id}' to have parent: '#{expected_parent_id}'\n"
      if parent_node = prev_nodes[node.actual_level - 1]
        node.parentId = parent_node.id
        log(:warn) {
          msg + " • We got '#{parent_node.id}' instead"
        } unless expected_parent_id == node.parentId
      elsif node.actual_level == 1
        # expected to not have parent
      else
        log(:warn) {
          msg + "but we did not get parent."
        }
      end
      prev_nodes[node.actual_level] = node
    end
  end
end

#repeated_idsObject

Tracker helper (those repeated)



79
80
81
# File 'lib/eco/data/locations/node_level/cleaner.rb', line 79

def repeated_ids
  @repeated_ids ||= []
end

#reset_trackers!Object



88
89
90
91
# File 'lib/eco/data/locations/node_level/cleaner.rb', line 88

def reset_trackers!
  @done_ids     = []
  @repeated_ids = []
end

#tidy_nodes(nodes, prev_node: nil, main: true) ⇒ Array<NodeLevel>

Note:
  1. It first discards node ids/tags that have been already pulled (discard repeated)
  2. For non repeated, it identifies if there's a gap (jump of multiple levels)
  3. It covers the gap if present by decoupling merged parent(s) from the same node (see node.decouple)
  4. Then, it delegates the filling in of parents to fill_in_parents function.

Prevents repeated node ids/tags, decouples merged levels, covers gaps (jumping multiple levels)

Returns:

  • (Array<NodeLevel>)

    child to parent relationships solved and no double-ups.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/eco/data/locations/node_level/cleaner.rb', line 14

def tidy_nodes(nodes, prev_node: nil, main: true)
  reset_trackers! if main

  prev_level = prev_node&.actual_level || 0

  nodes.each_with_object([]) do |node, out|
    if done_ids.include?(node.id)
      row_str = node.row_num ? " - (row: #{node.row_num})" : ''
      repeated_ids << "#{node.id} (level: #{node.level})#{row_str}"
    else
      level = node.actual_level
      common_level   = node.common_level_with(prev_node)
      common_level ||= prev_level
      gap            = level - (common_level + 1)

      unless gap < 1
        msg = "(Row: #{node.row_num}) ID/Tag '#{node.id}' (lev #{level}) jumps #{gap} level(s) (expected #{prev_level + 1})."
        #puts "  " + node.tags_array.pretty_inspect
        missing_nodes = node.decouple(gap)
        msg << "\n  Adding missing upper node(s): " + missing_nodes.map(&:raw_tag).pretty_inspect
        log(:debug) { msg }
        # The very top missing node (first in list) should be checked against prev_level
        # alongside any descendants in missing_nodes (when gap 2+)
        tidied_nodes = tidy_nodes(missing_nodes, prev_node: prev_node, main: false)
        out.push(*tidied_nodes)
      end
      out       << node
      done_ids  << node.id
      prev_node  = node
    end
  end.tap do |out|
    if main
      report_repeated_node_ids(repeated_ids)
      fill_in_parents(out)
    end
  end
end