Class: ActiveFacts::Generators::Transform::DataVault

Inherits:
Object
  • Object
show all
Defined in:
lib/activefacts/generators/transform/datavault.rb

Instance Method Summary collapse

Constructor Details

#initialize(vocabulary, *options) ⇒ DataVault

Returns a new instance of DataVault.



18
19
20
21
# File 'lib/activefacts/generators/transform/datavault.rb', line 18

def initialize(vocabulary, *options)
  @vocabulary = vocabulary
  @constellation = vocabulary.constellation
end

Instance Method Details

#assert_date_timeObject



151
152
153
# File 'lib/activefacts/generators/transform/datavault.rb', line 151

def assert_date_time
  assert_value_type('Date Time')
end

#assert_record_sourceObject



147
148
149
# File 'lib/activefacts/generators/transform/datavault.rb', line 147

def assert_record_source
  assert_value_type('Record Source', assert_value_type('String'))
end

#assert_value_type(name, supertype = nil) ⇒ Object



142
143
144
145
# File 'lib/activefacts/generators/transform/datavault.rb', line 142

def assert_value_type name, supertype = nil
  @vocabulary.valid_value_type_name(name) ||
    @constellation.ValueType(:vocabulary => @vocabulary, :name => name, :supertype => supertype, :concept => :new)
end

#classify_satellite_references(table) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/activefacts/generators/transform/datavault.rb', line 80

def classify_satellite_references table
  identifying_references = table.identifier_columns.map{|c| c.references.first}.uniq
  non_identifying_references = table.columns.map{|c| c.references[0]}.uniq - identifying_references

  # Skip this table if no satellite data is needed
  # REVISIT: Needed anyway for a link?
  if non_identifying_references.size == 0
    return nil
  end

  satellites = non_identifying_references.inject({}) do |hash, ref|
      # Extract the declared satellite name, or use just "satellite"
      satellite_subname =
	ref.fact_type.internal_presence_constraints.map do |pc|
	  next if !pc.max_frequency || pc.max_frequency > 1 # Not a Uniqueness Constraint
	  next if pc.role_sequence.all_role_ref.size > 1    # Covers more than one role
	  next if pc.role_sequence.all_role_ref.single.role.object_type != table  # Not a unique attribute
	  pc.concept.all_concept_annotation.map do |ca|
	    if ca.mapping_annotation =~ /^satellite */
	      ca.mapping_annotation.sub(/^satellite +/, '')
	    else
	      nil
	    end
	  end
	end.flatten.compact.uniq[0] || table.name
      satellite_name = satellite_subname
      (hash[satellite_name] ||= []) << ref
      hash
    end
  trace :datavault, "#{table.name} satellites are #{satellites.inspect}"
  satellites
end

#classify_tablesObject



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/activefacts/generators/transform/datavault.rb', line 23

def classify_tables
  initial_tables = @vocabulary.tables
  non_reference_tables = initial_tables.reject do |table|
    table.concept.all_concept_annotation.detect{|ca| ca.mapping_annotation == 'static'} or
      !table.is_a?(ActiveFacts::Metamodel::EntityType)
  end
  @reference_tables = initial_tables-non_reference_tables

  @link_tables, @hub_tables = non_reference_tables.partition do |table|
    identifying_references = table.identifier_columns.map{|c| c.references.first}.uniq
    # Which identifying_references are played by other tables?
    ir_tables =
      identifying_references.select do |r|
	table_referred_to = r.to
	# I have no examples of multi-level absorption, but it's possible, so loop
	while av = table_referred_to.absorbed_via
	  table_referred_to = av.from
	end
	table_referred_to.is_table
      end
    ir_tables.size > 1
  end
  trace_table_classifications
end

#create_one_to_many(one, many, predicate_1 = 'has', predicate_2 = 'is of', one_adj = nil) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/activefacts/generators/transform/datavault.rb', line 113

def create_one_to_many(one, many, predicate_1 = 'has', predicate_2 = 'is of', one_adj = nil)
  # Create a fact type
  fact_type = @constellation.FactType(:concept => :new)
  one_role = @constellation.Role(:concept => :new, :fact_type => fact_type, :ordinal => 0, :object_type => one)
  many_role = @constellation.Role(:concept => :new, :fact_type => fact_type, :ordinal => 1, :object_type => many)

  # Create two readings
  reading2 = @constellation.Reading(:fact_type => fact_type, :ordinal => 0, :role_sequence => [:new], :text => "{0} #{predicate_2} {1}")
  @constellation.RoleRef(:role_sequence => reading2.role_sequence, :ordinal => 0, :role => many_role)
  @constellation.RoleRef(:role_sequence => reading2.role_sequence, :ordinal => 1, :role => one_role, :leading_adjective => one_adj)

  reading1 = @constellation.Reading(:fact_type => fact_type, :ordinal => 1, :role_sequence => [:new], :text => "{0} #{predicate_1} {1}")
  @constellation.RoleRef(:role_sequence => reading1.role_sequence, :ordinal => 0, :role => one_role, :leading_adjective => one_adj)
  @constellation.RoleRef(:role_sequence => reading1.role_sequence, :ordinal => 1, :role => many_role)

  one_id = @constellation.PresenceConstraint(
      :concept => :new,
      :vocabulary => @vocabulary,
      :name => one.name+'HasOne'+many.name,
      :role_sequence => [:new],
      :is_mandatory => true,
      :min_frequency => 1,
      :max_frequency => 1,
      :is_preferred_identifier => false
    )
  @constellation.RoleRef(:role_sequence => one_id.role_sequence, :ordinal => 0, :role => many_role)
  one_role
end

#create_satellite(table, satellite_name, references) ⇒ Object



192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/activefacts/generators/transform/datavault.rb', line 192

def create_satellite(table, satellite_name, references)
  satellite_name = satellite_name.words.titlewords*' '+' SAT'

  # Create a new entity type with record-date fields in its identifier
  trace :datavault, "Creating #{satellite_name} with #{references.size} references"
  satellite = @constellation.EntityType(:vocabulary => @vocabulary, :name => "#{satellite_name}", :concept => [:new, :implication_rule => "datavault"])
  satellite.definitely_table

  table_role = create_one_to_many(table, satellite)

  date_time = assert_date_time
  date_time_role = create_one_to_many(date_time, satellite, 'is of', 'was loaded at', 'load')
  create_two_role_identifier(table_role, date_time_role)

  record_source = assert_record_source
  record_source.length = 64
  record_source_role = create_one_to_many(record_source, satellite, 'is of', 'was loaded from')

  # Move all roles across to it from the parent table.
  references.each do |ref|
    trace :datavault, "Moving #{ref} across to #{table.name}_#{satellite_name}" do
      table_role = ref.fact_type.all_role.detect{|r| r.object_type == table}
      if table_role
	remote_table = ref.to
	while remote_table.absorbed_via
	  absorbed_into = remote_table.absorbed_via.from
	  remote_table = absorbed_into
	end
	if @hub_tables.include?(remote_table)
	  lift_role_to_link(ref, table_role)
	else
	  # Reassign the role player to the satellite:
	  table_role.object_type = satellite
	end
      else
	#debugger  # Bum, the crappy Reference object bites again.
	$stderr.puts "REVISIT: Can't move the objectified role for #{ref.inspect}. This column will remain in the hub instead of moving to the satellite"
      end
    end
  end
  satellite
end

#create_two_role_identifier(r1, r2) ⇒ Object

Create a PresenceConstraint with two roles, marked as preferred_identifier



156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/activefacts/generators/transform/datavault.rb', line 156

def create_two_role_identifier(r1, r2)
  pc = @constellation.PresenceConstraint(
      :concept => :new,
      :vocabulary => @vocabulary,
      :name => r1.object_type.name+' '+r1.object_type.name+'PK',
      :role_sequence => [:new],
      :is_mandatory => true,
      :min_frequency => 1,
      :max_frequency => 1,
      :is_preferred_identifier => true
    )
  @constellation.RoleRef(:role_sequence => pc.role_sequence, :ordinal => 0, :role => r1)
  @constellation.RoleRef(:role_sequence => pc.role_sequence, :ordinal => 1, :role => r2)
end

#detect_required_surrogatesObject



62
63
64
65
66
67
68
69
# File 'lib/activefacts/generators/transform/datavault.rb', line 62

def detect_required_surrogates
  trace :datavault, "Detecting required surrogates" do
    @required_surrogates =
      (@hub_tables+@link_tables).select do |table|
	table.dv_needs_surrogate
      end
  end
end

#generate(out = $stdout) ⇒ Object



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# File 'lib/activefacts/generators/transform/datavault.rb', line 235

def generate(out = $stdout)
  @out = out

  # Strategy:
  # Determine list of ER tables
  # Partition tables into reference tables (annotated), link tables (two+ FKs in PK), and hub tables
  # For each hub and link table
  #   Apply a surrogate key if needed (all links, hubs lacking a simple surrogate)
  #   Detect references (fact types) leading to all attributes (non-identifying columns)
  #   Group attribute facts into satellites (use the satellite annotation if present)
  #   For each satellite
  #     Create a new entity type with a (hub-key, record-date key)
  #     Make new one->many fact type between hub and satellite
  #     Modify all attribute facts in this group to attach to the satellite
  # Compute a gresh relational mapping
  # Exclude reference tables and disable enforcement to them

  classify_tables

  detect_required_surrogates

  @sat_tables = []
  trace :datavault, "Creating satellites" do
    (@hub_tables+@link_tables).each do |table|
      satellites = classify_satellite_references table
      next unless satellites

      trace :datavault, "Creating #{satellites.size} satellites for #{table.name}" do
	satellites.each do |satellite_name, references|
	  @sat_tables << create_satellite(table, satellite_name, references)
	end
      end
    end
  end
  trace :datavault, "#{@sat_tables.size} satellite tables created"

  inject_required_surrogates

  trace :datavault, "Adding standard fields to hubs and links" do
    (@hub_tables+@link_tables).each do |table|
      date_time = assert_date_time
      date_time_role = create_one_to_many(date_time, table, 'is of', 'was loaded at', 'load')

      record_source = assert_record_source
      record_source_role = create_one_to_many(record_source, table, 'is of', 'was loaded from')
    end
  end

  # Now, redo the E-R mapping using the revised schema:
  @vocabulary.decide_tables

  # Suffix Hub and Link tables with HUB and LINK
  @hub_tables.each { |h| h.name = "#{h.name} HUB"}
  @link_tables.each { |l| l.name = "#{l.name} LINK"}

  # Before departing, ensure we don't emit the reference tables!
  @reference_tables.each do |table|
    table.definitely_not_table
    @vocabulary.tables.delete(table)
  end

end

#inject_required_surrogatesObject



71
72
73
74
75
76
77
78
# File 'lib/activefacts/generators/transform/datavault.rb', line 71

def inject_required_surrogates
  trace :datavault, "Injecting any required surrogates" do
    trace :datavault, "Need to inject surrogates into #{@required_surrogates.map(&:name)*', '}"
    @required_surrogates.each do |table|
      table.dv_inject_surrogate
    end
  end
end


171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/activefacts/generators/transform/datavault.rb', line 171

def lift_role_to_link(ref, table_role)
  trace :datavault, "Broaden #{ref} into a new link"
  uc = table_role.uniqueness_constraint
  one_to_one_constraint = ref.fact_type.internal_presence_constraints.detect{|pc| pc != uc }

  # Any query Step or Reading on this fact type should be unaffected

  # Make a new RoleRef for the uniqueness constraint so it spans
  uc.constellation.RoleRef(uc.role_sequence, 1, :role => ref.to_role)
  one_to_one_constraint.retract if one_to_one_constraint

  # Add the objectifying entity type:
  et = uc.constellation.EntityType(
    uc.vocabulary,
    "#{ref.from.name} #{ref.to_names*' '}",
    :fact_type => ref.fact_type,
    :concept => :new
  )
  @link_tables << et
end

#trace_table_classificationsObject



48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/activefacts/generators/transform/datavault.rb', line 48

def trace_table_classifications
  # Trace the decisions about table types:
  if trace :datavault
    [@reference_tables, @hub_tables, @link_tables].zip(['Reference', 'Hub', 'Link']).each do |tables, kind|
      trace :datavault, kind+' tables:' do
	tables.each do |table|
	  identifying_references = table.identifier_columns.map{|c| c.references.first}.uniq
	  trace :datavault, "#{table.name}(#{identifying_references.map{|r| (t = r.to) && t.name || 'self'}*', '})"
	end
      end
    end
  end
end