Class: DataShift::DataFlowSchema

Inherits:
Object
  • Object
show all
Includes:
Logging
Defined in:
lib/datashift/mapping/data_flow_schema.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Logging

#logdir, #logdir=, #logger, #verbose

Constructor Details

#initializeDataFlowSchema

Returns a new instance of DataFlowSchema.



54
55
56
# File 'lib/datashift/mapping/data_flow_schema.rb', line 54

def initialize
  @nodes = DataShift::NodeCollection.new
end

Instance Attribute Details

#nodesObject (readonly)

Returns the value of attribute nodes.



52
53
54
# File 'lib/datashift/mapping/data_flow_schema.rb', line 52

def nodes
  @nodes
end

#raw_dataObject (readonly)

Returns the value of attribute raw_data.



52
53
54
# File 'lib/datashift/mapping/data_flow_schema.rb', line 52

def raw_data
  @raw_data
end

#yaml_dataObject (readonly)

Returns the value of attribute yaml_data.



52
53
54
# File 'lib/datashift/mapping/data_flow_schema.rb', line 52

def yaml_data
  @yaml_data
end

Instance Method Details

#create_node_collections(klass, doc_context: nil) ⇒ Object



72
73
74
75
76
# File 'lib/datashift/mapping/data_flow_schema.rb', line 72

def create_node_collections(klass, doc_context: nil)
  context = doc_context || DocContext.new(klass)
  @nodes = DataShift::NodeCollection.new(doc_context: context)
  @nodes
end

#headersObject

@headers= <DataShift::Header:0x00000004bc37f8

@presentation="status_str",
@source="status_str">],


63
64
65
66
# File 'lib/datashift/mapping/data_flow_schema.rb', line 63

def headers
  # TODO: fix doc context so it can be created 'empty' i.e without AR klass, and always has empty headers
  @nodes.doc_context.try(:headers) || []
end

#klass_to_model_methods(klass) ⇒ Object

Helpers for dealing with Active Record models and collections Catalogs the supplied Klass and builds set of expected/valid Headers for Klass



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/datashift/mapping/data_flow_schema.rb', line 100

def klass_to_model_methods(klass)

  op_types_in_scope = DataShift::Configuration.call.op_types_in_scope

  collection = ModelMethods::Manager.catalog_class(klass)

  model_methods = []

  if collection

    collection.each { |mm| model_methods << mm if(op_types_in_scope.include? mm.operator_type) }

    remove = DataShift::Transformation::Remove.new

    remove.unwanted_model_methods model_methods
  end

  model_methods
end

#prepare_from_file(yaml_file, locale_key = 'data_flow_schema') ⇒ Object

Supports YAML with optional ERB snippets

See Config generation or lib/datashift/templates/import_export_config.erb for full syntax

Returns DataShift::NodeCollection



126
127
128
129
130
131
132
# File 'lib/datashift/mapping/data_flow_schema.rb', line 126

def prepare_from_file(yaml_file, locale_key = 'data_flow_schema')
  @raw_data = ERB.new(File.read(yaml_file)).result

  yaml = Configuration.parse_yaml(yaml_file)

  prepare_from_yaml(yaml, locale_key)
end

#prepare_from_klass(klass, doc_context = nil) ⇒ Object

Build the node collection from a Class, that is for each operator in scope create a method binding and a node context, and add to collection.



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/datashift/mapping/data_flow_schema.rb', line 81

def prepare_from_klass( klass, doc_context = nil )

  @nodes = create_node_collections(klass, doc_context: doc_context)

  klass_to_model_methods( klass ).each_with_index do |mm, i|
    @nodes.headers.add(mm.operator) # for a class, the header names, default to the operators (methods)

    binding = MethodBinding.new(mm.operator, mm, idx: i)

    # TODO: - do we really need to pass in the doc context when parent nodes already has it ?
    @nodes << DataShift::NodeContext.new(@nodes.doc_context, binding, i, nil)
  end

  @nodes
end

#prepare_from_string(text, locale_key = 'data_flow_schema') ⇒ Object



134
135
136
137
138
139
# File 'lib/datashift/mapping/data_flow_schema.rb', line 134

def prepare_from_string(text, locale_key = 'data_flow_schema')
  @raw_data = text
  yaml = YAML.safe_load(raw_data)

  prepare_from_yaml(yaml, locale_key)
end

#prepare_from_yaml(yaml, locale_key = 'data_flow_schema') ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/datashift/mapping/data_flow_schema.rb', line 141

def prepare_from_yaml(yaml, locale_key = 'data_flow_schema')

  @yaml_data = yaml

  raise "Bad YAML syntax  - No key #{locale_key} found in #{yaml}" unless yaml[locale_key]

  locale_section = yaml[locale_key]

  if(locale_section.key?('Global'))
    global_nodes = locale_section.delete('Global')

    [*global_nodes].each do |c|

      # TODO: what is c ?   a list or hash ?
      # if DataShift::Configuration.call.respond_to #{c}=
      # Set the global value e.g
      # DataShift::Configuration.call.force_inclusion_of_columns = [:audio]
    end
  end

  unless locale_section.keys.present?
    logger.warn('No class related configuration found in YAML syntax- Nothing to process')
    return DataShift::NodeCollection.new
  end

  class_name = locale_section.keys.first

  klass = MapperUtils.class_from_string_or_raise(class_name)

  klass_section = locale_section[class_name]

  DataShift::Transformation.factory { |f| f.configure_from_yaml(class_name, klass_section) }

  @nodes = create_node_collections(klass)

  if(klass_section && klass_section.key?('nodes'))

    yaml_nodes = klass_section['nodes']

    logger.info("Read Data Schema Nodes: #{yaml_nodes.inspect}")

    unless(yaml_nodes.is_a?(Array))
      Rails.logger.error('Bad syntax in flow schema YAML - Nodes should be a sequence')
      raise 'Bad syntax in flow schema YAML - Nodes should be a sequence'
    end

    # for operator and type
    model_method_mgr = ModelMethods::Manager.catalog_class(klass)

    yaml_nodes.each_with_index do |keyed_node, i|

      unless(keyed_node.keys.size == 1)
        raise ConfigFormatError, "Bad syntax in flow schema YAML - Section #{keyed_node} should be keyed hash"
      end

      # data_flow_schema:
      #   Project:
      #     nodes:
      #       - project:
      #           source: "title"           # source of data, defaults to node name (project) if not specified
      #           presentation: "Title"     # e.g for export headers
      #           operator: title
      #           operator_type: has_many
      #
      logger.info("Node Data: #{keyed_node.inspect}")

      node = keyed_node.keys.first

      section = keyed_node.values.first || {}

      # TODO: - layout with heading is verbose for no benefit - defunct, simply node.source, node.presentation
      source = section.fetch('heading', {}).fetch('source', nil)

      # Unless a specific source mentioned assume the node is the source
      source ||= section.fetch('source', node)

      presentation = section.fetch('presentation', nil)

      @nodes.headers.add(source, presentation: presentation)

      if(section['operator'])
        # Find the domain model method details
        # byebug
        model_method = model_method_mgr.search(section['operator'])

        unless model_method
          operator_type = section['operator_type'] || :method

          # TODO: validate type ? guess we expect one of ModelMethod.supported_types_enum
          model_method = model_method_mgr.insert(section['operator'], operator_type)
          # TODO: - This index could be hard coded by the user in YAML or we try to derive it from the headers
          # byebug
          method_binding = MethodBinding.new(source, model_method, idx: section['index'])
        end
      end

      # Now ensure we bind source/header(and index) to the method tht performs assignment of inbound datum to the model
      #
      # TOFIX - This is a bug waiting to happen right ? i is not coming from the headers
      # so chances are user hasn't made config indexed as per headers
      # index could be hard coded by the user in YAML or we try to derive it from the headers via the binder ???
      method_binding ||= MethodBinding.new(source, model_method, idx: i)

      node_context = DataShift::NodeContext.new(@nodes.doc_context, method_binding, i, nil)

      @nodes << node_context
    end
  end

  @nodes
end

#sourcesObject



68
69
70
# File 'lib/datashift/mapping/data_flow_schema.rb', line 68

def sources
  @nodes.collect(&:method_binding).collect(&:source)
end