Class: Twb::Analysis::CalculatedFieldsAnalyzer

Inherits:
Object
  • Object
show all
Includes:
Graph, TabTool
Defined in:
lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb

Overview

< Twb::Util::Graph

Constant Summary collapse

@@ttlogfile =
'CalculatedFieldsAnalyzer.ttlog'
@@gvDotLocation =
'C:\\tech\\graphviz\\Graphviz2.38\\bin\\dot.exe'
@@processName =
'.CalculatedFields'
@@calcFieldsCSVFileName =
'TwbCalculatedFields.csv'
@@calcFieldsCSVFileHeader =
['Record #',
 'Workbook',      'Workbook Dir',
 'Data Source',   'Data Source Caption', 'Data Source Name (tech)',
 'Field Name',    'Field Caption',       'Field Name (tech)',
 'Data Source + Field Name (tech)',
 'Data Type',     'Role',  'Type',
 'Class',
 'Scope Isolation',
 'Formula Length',
 'Formula',
 'Formula (tech)',
 'Formula Comments',
 'Formula LOD?'
]
@@calcLinesCSVFileName =
'TwbCalculatedFieldFormulaLines.csv'
@@calcLinesCSVFileHeader =
['Calc Field #',
 'Workbook',      'Workbook Dir',
 'Data Source',   'Data Source Caption', 'Data Source Name (tech)',
 'Field Name',    'Field Caption',       'Field Name (tech)',
 'Formula',       'Formula Line #',      'Formula Line'
]
@@formFieldsCSVFileName =
'TwbFormulaFields.csv'
@@formFieldsCSVFileHeader =
['Rec #',
   'Workbook',   'Workbook Dir',
   'Data Source',
   'Field - Calculated',
   'Data Source - Formula (tech)',
   'Data Source - Formula',
   'Field - Formula (tech)',
   'Field - Formula',
   'Data Source + Field - Calculated',
   'Table'
]
@@dotHeader =
"  digraph g {\n      graph [rankdir=\"LR\" splines=line];\n      node  [shape=\"box\"  width=\"2\"];\n\n"
@@unrankedTypes =
['CalculationField']

Constants included from TabTool

TabTool::TTDOCDIR

Instance Attribute Summary collapse

Attributes included from Graph

#edges, #nodes

Attributes included from TabTool

#docfiles, #funcdoc, #logfilename, #logger, #loglevel, #metrics

Instance Method Summary collapse

Methods included from TabTool

#addDocFile, #docFile, #docFileMaxNameLen, #docfilesdoc, #emit, #initCSV, #initDocDir, #initLogger

Constructor Details

#initializeCalculatedFieldsAnalyzer



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 84

def initialize
    @funcdoc = {:class=>self.class, :blurb=>'Analyzing Calculated Fields from Tableau Workbooks.', :description=>'Calculated fields can be complex, this tool provides robust coverage.',}
    #-- CSV records collectors

    @csvCalculatedFields = []
    @csvFormulaFields    = []
    @csvFormulaLines     = []
    #-- Counters setup --

    @twbCount              = 0
    @calculatedFieldsCount = 0
    @formulaFieldsCount    = 0
    #--

    @referencedFields     = SortedSet.new
    #--

    @csvCF   = initCSV(@@calcFieldsCSVFileName, 'Calculated fields and their formulas.',                      @@calcFieldsCSVFileHeader)
    @csvCFLs = initCSV(@@calcLinesCSVFileName,  "Calculated fields and their formulas' individual lines.",    @@calcLinesCSVFileHeader)
    @csvFF   = initCSV(@@formFieldsCSVFileName, 'Calculated fields and the fields their formulas reference.', @@formFieldsCSVFileHeader)
    #--

    @localEmit  = false
    @imageFiles = []
end

Instance Attribute Details

#calculatedFieldsCountObject (readonly)

Returns the value of attribute calculatedFieldsCount.



29
30
31
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 29

def calculatedFieldsCount
  @calculatedFieldsCount
end

#dataFilesObject (readonly)

Returns the value of attribute dataFiles.



29
30
31
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 29

def dataFiles
  @dataFiles
end

#formulaFieldsCountObject (readonly)

Returns the value of attribute formulaFieldsCount.



29
30
31
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 29

def formulaFieldsCount
  @formulaFieldsCount
end

#ttdocdirObject

Returns the value of attribute ttdocdir.



30
31
32
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 30

def ttdocdir
  @ttdocdir
end

Instance Method Details

#closeDot(dotFile, twb) ⇒ Object



435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 435

def closeDot dotFile, twb
  dotFile.puts ' '
  dotFile.puts '// -------------------------------------------------------------'
  dotFile.puts ' '
  dotFile.puts '   subgraph cluster_1 {'
  # dotFile.puts '     color=white;'

  dotFile.puts '     style=invis;'
  # dotFile.puts '     border=0;'

  dotFile.puts '     node [border=blue];'
  dotFile.puts ' '
  dotFile.puts '     "" [style=invis]'
  dotFile.puts "     \"Tableau Tools\\nCalculated Fields Map\\nWorkbook '#{twb}'\\n#{Time.new.ctime}\"  [penwidth=0]"
  # dotFile.puts "     \"Tableau Tools Workbook  Calculated Fields Map\\n#{Time.new.ctime}\"  -> \"\"  [style=invis]"

  dotFile.puts ' '
  dotFile.puts '   }'
  dotFile.puts ' '
  dotFile.puts '}'
  dotFile.close
end

#cypher(twbName) ⇒ Object



317
318
319
320
321
322
323
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 317

def cypher twbName
  cypher = Twb::Util::Cypher.new
  cypher.fileName = "#{twbName}.calcFields"
  cypher.nodes = @nodes
  cypher.edges = @edges
  cypher.render
end

#cypherPy(twbName) ⇒ Object



325
326
327
328
329
330
331
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 325

def cypherPy twbName
  cypher = Twb::Util::CypherPython.new
  cypher.fileName = "#{twbName}.calcFields"
  cypher.nodes = @nodes
  cypher.edges = @edges
  cypher.render
end

#emitCalcfield(calcField) ⇒ Object

def processDataSource



266
267
268
269
270
271
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 266

def emitCalcfield calcField
  emit "\t FIELD    cap :: #{calcField.caption} "
  emit "\t         tname:: #{calcField.name}"
  emit "\t        uiname:: #{calcField.uiname}"
  emit "\t       formula:: #{calculation.formulaFlat}"
end

#emitEdgesObject

end



350
351
352
353
354
355
356
357
358
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 350

def emitEdges
  emit  "  %-15s    %s" % ['type', 'Edge']
  emit  "  %-15s    %s" % ['-'*15, '-'*35]
  @edges.each do |edge|
    emit  "  %-15s    %s" % [edge.from.type, edge.from]
    emit  "  %-15s    %s" % [edge.to.type,   edge.to]
    emit  "\n "
  end
end

#emitGmlObject



132
133
134
135
136
137
138
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 132

def emitGml
  gml = Twb::Util::GML.new
  gml.fileName = @twb.name
  gml.nodes = @nodes
  gml.edges = @edges
  gml.render
end

#emitTypes(dotFile) ⇒ Object



360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 360

def emitTypes dotFile
  typedNodes = {}
  dotFile.puts "\n\n  //  2--------------------------------------------------------------------"
  @edges.each do |edge|
    emit  "   EDGE  :: #{edge}"
    loadNodeType typedNodes, edge.from
    loadNodeType typedNodes, edge.to
  end
  typedNodes.each do |type, nodes|
    # emit  "+++++++++ typedNodes of '#{type}''  "

    # nodes.each do |node|

    #   emit  "           -n- #{node}"

    # end

    rankSame(dotFile, type, nodes) unless type.eql? 'CalculatedField' # == :CalculatedField

  end
  # labelTypes dotFile, edges

end

#initDot(twb) ⇒ Object



428
429
430
431
432
433
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 428

def initDot twb
  dotFileName = docFile("#{twb}#{@@processName}.dot")
  dotFile = File.open(dotFileName,'w')
  dotFile.puts @@dotHeader
  return {:file => dotFile, :name => dotFileName}
end

#labelTypes(dotFile) ⇒ Object



410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 410

def labelTypes dotFile
  fromTos = Set.new
  @edges.each do |edge|
    # fromTos.add "\"Alien Data Source\"  -> \"Alien Data Source\""

    fromTos.add "\"#{edge.from.type}\""
    fromTos.add "\"#{edge.to.type}\""
  end
  return if fromTos.empty?
  dotFile.puts "\n  //  3--------------------------------------------------------------------"
  dotFile.puts '   subgraph cluster_0 {'
  dotFile.puts '     color=white;'
  dotFile.puts '     node [shape="box3d"  style="filled" ];'
  fromTos.each do |ft|
    dotFile.puts "    #{ft}"
  end
  dotFile.puts '   }'
end

#loadNodeType(set, node) ⇒ Object



378
379
380
381
382
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 378

def loadNodeType set, node
  type = node.type
  set[type] = Set.new unless set.include? type
  set[type].add node
end

#mapTwbObject



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 273

def mapTwb 
  twb        = @twb.name
  rootFields = @twbRootFields
  dotStuff    = initDot twb
  dotFile     = dotStuff[:file]
  dotFileName = dotStuff[:name]
  dotFile.puts "\n //  subgraph cluster_1 {"
  dotFile.puts "   //    color= grey;"
  dotFile.puts ""
  edgesAsStrings = SortedSet.new
  # this two step process coalesces the edges into a unique set, avoiding duplicating the dot

  # file entries, and can be shrunk when graph edges expose the bits necessary for management by Set

  emit "\n========================\nLoading Edges\n========================\n  From DC?  Referenced?  Edge \n  %s  %s  %s" % ['--------', '-----------', '-'*45]
  @edges.each do |e|
    # don't want to emit edge which is from a Data Connection to a

    # Calculated Field which is also referenced by another calculated field

    isFromDC   = e.from.type == :TwbDataConnection
    isRefField = @referencedFields.include?(e.to.id)
    edgesAsStrings.add(e.dot) unless isFromDC && isRefField
    # emit "   ES        #{e.dot}"

    # emit "   ES  from  #{e.from}"

    # emit "   ES    to  #{e.to}"

  end
  emit "------------------------\n "
  edgesAsStrings.each do |es|
    dotFile.puts "        #{es}"
  end
  emit "========================\n "
  dotFile.puts ""
  dotFile.puts "   // }"
  dotFile.puts "\n\n  //  4 NODES --------------------------------------------------------------------"
  @nodes.each do |n|
    dotFile.puts n.dotLabel
  end
  dotFile.puts "\n\n  //  5--------------------------------------------------------------------"
  emitTypes(      dotFile )
  closeDot(       dotFile, twb )
  emit "Rendering DOT file - #{twb}"
  renderDot(twb,dotFileName,'pdf')
  renderDot(twb,dotFileName,'png')
  renderDot(twb,dotFileName,'svg')
  # emitEdges

end

#processDataSource(ds) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 140

def processDataSource ds
  emit   "=======    DATA SOURCE: #{ds.uiname}    ====== "
  dsNodes               = Set.new
  dsEdges               = Set.new
  dsFields              = {}
  @twbFields[ds.uiname] = dsFields
  calculatedFields      = SortedSet.new
  fieldFormulaLines     = [] 
  referencedFields      = SortedSet.new
  dataSourceNode = Twb::Util::Graphnode.new(name: ds.uiname, id: ds.id, type: ds, properties: {workbook: @twb.name})
  @nodes.add dataSourceNode
  #-- process Calculatred Fields

  ds.calculatedFields.each do |calcField|
    calculatedFields.add calcField.id
    dsFields[calcField.uiname] = calcField
    calcFieldNode    = Twb::Util::Graphnode.new(name: calcField.uiname, id: calcField.id, type: calcField, properties: {:DataSource => ds.uiname})
    @nodes.add calcFieldNode
    dsFieldEdge     = Twb::Util::Graphedge.new(from: dataSourceNode, to: calcFieldNode, relationship: 'contains')
    @edges.add dsFieldEdge
    calculation     = calcField.calculation
    if calculation.has_formula
        #-- collect field formulas as single lines

        @csvCalculatedFields.push [
                                    @calculatedFieldsCount += 1,
                                    @twb.name,           
                                    @twbDir,
                                    ds.uiname,
                                    ds.caption,
                                    ds.name,
                                    calcField.uiname,
                                    calcField.caption,
                                    calcField.name,
                                    ds.name + '::' + calcField.name,
                                    calcField.datatype,
                                    calcField.role,
                                    calcField.type,
                                    calculation.class,     
                                    calculation.scopeIsolation,
                                    calculation.formulaFlat.length, 
                                    calculation.formulaResolved,    
                                    calculation.formulaFlat,
                                    calculation.comments,
                                    calculation.is_lod
                                  ]
        #-- collect individual formula lines

        flnum = 0
        emit "@@ FL: #{calcField.uiname}"
        calculation.formulaResolvedLines.each do |fl|
          emit "@@ FL:   => '#{fl}'"
          fieldFormulaLines << [ @calculatedFieldsCount,        # 'Calc Field #',

                                 @twb.name,                     # 'Workbook',      

                                 @twbDir,                       # 'Workbook Dir',

                                 ds.uiname,                     # 'Data Source',   

                                 ds.caption,                    # 'Data Source Caption', 

                                 ds.name,                       # 'Data Source Name (tech)',

                                 calcField.uiname,              # 'Field Name',    

                                 calcField.caption,             # 'Field Caption',       

                                 calcField.name,                # 'Field Name (tech)',

                                 calcField.calculation.formulaFlatResolved, # 'Formula'

                                 flnum += 1,                    # 'Formula Line #',

                                 fl.start_with?(" ") ? "'#{fl}" : fl # 'Formula Line' - THIS IS A STUPID HACK NEEDED BECAUSE TABLEAU STRIPS LEADING BLANKS FROM CSV VALUES

                               ]
        end
        #-- collect fields referenced in formula

        calculation.calcFields.each do |rf|
          emit "  rf.name  :'#{rf.name}'"
          emit "  rf.uiname:'#{rf.uiname}'"
          properties = {'DataSource' => ds.uiname, 'DataSourceReference' => 'local', :source => rf}
          refFieldNode  = Twb::Util::Graphnode.new(name: rf.uiname, id: rf.id,   type: rf.type, properties: properties)
          @nodes.add refFieldNode
          fieldFieldEdge = Twb::Util::Graphedge.new(from: calcFieldNode,  to: refFieldNode, relationship: 'references')
          @edges.add fieldFieldEdge
          referencedFields.add rf.id
          refFieldTable = ds.fieldTable(rf.name)
          emit "refFieldTable.nil? : #{refFieldTable.nil?}"
          unless refFieldTable.nil?
            tableID        = refFieldTable + ':::' + ds.uiname
            tableName      = "||#{refFieldTable}||"
            tableNode      = Twb::Util::Graphnode.new(name: tableName,      id: tableID,   type: :DBTable, properties: properties)
            @nodes.add tableNode
            fieldFieldEdge = Twb::Util::Graphedge.new(from: refFieldNode,  to: tableNode, relationship: 'is a field in')
            @edges.add fieldFieldEdge
            # fldToDsNode    = tableNode

          end
          @csvFormulaFields <<  [ 
                                  @formulaFieldsCount += 1,
                                  @twb.name,           
                                  @twbDir,
                                  ds.uiname,
                                  calcField.uiname,
                                  calculation.formulaFlat,
                                  calculation.formulaResolved,
                                  rf.name,
                                  rf.uiname,
                                  rf.id,
                                  refFieldTable
                                ]
        end # resolvedFields.each do

    end # if calculation.has_formula

  end # ds.calculatedFields.each 


  dsRootFields = calculatedFields - referencedFields
  @referencedFields.merge referencedFields
  @twbRootFields.merge dsRootFields
  cypher     @twb.name
  cypherPy   @twb.name
  emit "#######################"
  #-- record calculated fields 

  emit "@@ record calculated fields  ds: #{ds.uiname}"
  @csvCalculatedFields.each do |r| 
    @csvCF << r
  end
  #-- record individual formula lines 

  emit "@@ individual formula lines  ds: #{ds.uiname}"
  fieldFormulaLines.each do |ffl|
    @csvCFLs << ffl
  end
  #-- record formula-referenced fields 

  emit "@@ formula-referenced fields ds: #{ds.uiname}"
  @csvFormulaFields.each do |r| 
    @csvFF << r
  end
  #--

  return @imageFiles
end

#processTWB(workbook) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 105

def processTWB workbook
  @twb = workbook.is_a?(String) ? Twb::Workbook.new(workbook) : workbook
  throw Exception unless @twb.is_a? Twb::Workbook
  emit  "- Workbook: #{workbook}"
  emit  "   version: #{@twb.version}"
  @twbDir   = @twb.dir #File.dirname(File.expand_path(workbook))

  @edges    = Set.new
  #-- processing

  dss = @twb.datasources
  # puts "    # data sources: #{dss.length}"

  @twbRootFields = Set.new
  @twbFields = {}
  @nodes     = Set.new
  dss.each do |ds|
    # puts "\t\t - #{ds.uiname}  \t\t #{ds.calculatedFields.length}"

    next if ds.Parameters?  # don't process the Parameters data source - Parameters' fields aren't Calculated fields for our purposes

    # dataSourceNode = Twb::Util::Graphnode.new(name: ds.uiname, id: ds.id, type: ds, properties: {workbook: workbook})

    # @nodes.add dataSourceNode

    # ds.calculatedFields.each do |calcField|

    # end

    processDataSource ds
  end
  mapTwb
  emitGml
  @twbCount += 1
end

#rankRootFields(dotFile, dsRootFields) ⇒ Object



399
400
401
402
403
404
405
406
407
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 399

def rankRootFields dotFile, dsRootFields
  dotFile.puts "\n  // Unreferenced (root) Calculated Fields -----------------------------------------"
  dotFile.puts "\n  {rank=same "
  dsRootFields.each do |rf|
    emit "ROOT FIELD: #{rf.class}  :: #{rf}"
    dotFile.puts "     \"#{rf}\""
  end
  dotFile.puts "  }"
end

#rankSame(dotFile, type, nodes) ⇒ Object



385
386
387
388
389
390
391
392
393
394
395
396
397
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 385

def rankSame dotFile, type, nodes
  return if @@unrankedTypes.include? type.to_s
  @lines = SortedSet.new
  nodes.each do |node|
    @lines << node.id
  end
  dotFile.puts "\n  // '#{type}' --------------------------------------------------------------------"
  dotFile.puts "\n  {rank=same "
  @lines.each do |line|
    dotFile.puts "     \"#{line}\""
  end
  dotFile.puts "  }"
end

#renderDot(twb, dot, format) ⇒ Object



456
457
458
459
460
461
462
463
464
465
# File 'lib/twb/analysis/CalculatedFields/CalculatedFieldsAnalyzer.rb', line 456

def renderDot twb, dot, format
  imageType = '-T' + format
  imageFile  = './ttdoc/' + twb + @@processName + 'Graph.' + format
  imageParam = '-o"' + imageFile + '"'
  emit  "system  #{@@gvDotLocation} #{imageType} #{imageParam} \"#{dot}\""
         system "#{@@gvDotLocation} #{imageType} #{imageParam} \"#{dot}\""
  emit  "           - #{imageFile}"
  @imageFiles << imageFile
  return imageFile
end