Class: Dbtools::Database::DbConnection

Inherits:
Object
  • Object
show all
Defined in:
lib/dbtools/database/db_connection.rb

Direct Known Subclasses

MysqlConnection, PostgresqlConnection

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ DbConnection

Creates a connection to a database using the URL.



12
13
14
15
# File 'lib/dbtools/database/db_connection.rb', line 12

def initialize(url)
  @connection = ActiveRecord::Base.establish_connection(url).connection
  @database = get_current_database
end

Instance Attribute Details

#connectionObject

Returns the value of attribute connection.



9
10
11
# File 'lib/dbtools/database/db_connection.rb', line 9

def connection
  @connection
end

Instance Method Details

#check_reserved_keywordsObject

Function to check if reserved keywords occur in the schema/table/column names.



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/dbtools/database/db_connection.rb', line 55

def check_reserved_keywords
  reserved_keywords = "("
  Dbtools::Constants::RESERVED_KEYWORDS.each do |keyword|
    reserved_keywords << "'#{keyword}', "
  end
  reserved_keywords = reserved_keywords[0..-3]
  reserved_keywords << ")"
  sql = %{
select c.TABLE_CATALOG as "table_catalog", c.TABLE_SCHEMA as "table_schema", c.TABLE_NAME as "table_name", c.COLUMN_NAME as "column_name"
from information_schema.COLUMNS as c
where c.TABLE_SCHEMA not in ('information_schema', 'performance_schema', 'mysql', 'sys', 'pg_catalog')
  and (upper(c.COLUMN_NAME) in #{reserved_keywords}
  or upper(c.TABLE_CATALOG) in #{reserved_keywords}
  or upper(c.TABLE_NAME) in #{reserved_keywords})
}
  violations = []
  execute_query(sql).each do |h|
    table_schema = h['table_schema']
    table_name = h['table_name']
    column_name = h['column_name']
    violations << Violation.new(database: database_name,
                                metric: "Reserved keywords",
                                offender: "#{table_schema}.#{table_name}.#{column_name}")
  end
  return violations
end

#check_spellingObject

Checks the spelling of all column names.



204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/dbtools/database/db_connection.rb', line 204

def check_spelling
  database = @database
  violations = []

  database.tables.each do |table_name, table|
    table.columns.each do |col_name, col|
      next if col.name.correct?
      violations << Violation.new(database: database_name,
                                  metric: "Spelling",
                                  offender: "#{col.full_name.delete('"')}")
    end
  end
  return violations
end

#closeObject



17
18
19
# File 'lib/dbtools/database/db_connection.rb', line 17

def close
  @connection.close
end

#create_database(name) ⇒ Object

Creates a new database.



220
221
222
# File 'lib/dbtools/database/db_connection.rb', line 220

def create_database(name)
  @connection.create_database(name)
end

#database_nameObject



21
22
23
# File 'lib/dbtools/database/db_connection.rb', line 21

def database_name
  @connection.current_database
end

#execute_files(directory) ⇒ Object

Runs all sql files in the specified directory



225
226
227
228
229
230
231
232
233
# File 'lib/dbtools/database/db_connection.rb', line 225

def execute_files(directory)
  Dir.foreach(directory) do |file|
    # Skip these files.
    next if file == '.' or file == '..' or not ['.sql'].include?(File.extname(file))
    file_path = File.join(directory, file)
    content = File.read(file_path)
    execute_query(content)
  end
end

#get_all_columnsObject



46
47
48
49
50
51
# File 'lib/dbtools/database/db_connection.rb', line 46

def get_all_columns
  query = %{select c.COLUMN_NAME
from information_schema.COLUMNS as c
where c.TABLE_SCHEMA not in ('information_schema', 'performance_schema', 'mysql', 'sys', 'pg_catalog')}
  execute_query(query)
end

#get_completenessObject

Get the completeness of the columns.



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/dbtools/database/db_connection.rb', line 83

def get_completeness
  database = @database

  database.tables.values.each do |table|
    # This query counts all null entries(or entries containing '' for string columns) in every column.
    query = table.query_empty_records
    execute_query(query).each do |res|
      table.columns.each {|k, col| col.missing_entries = res[k]}
    end
    # This query counts the total values in the table.
    # The query should only return a single hashmap/dictionary as result.
    # Since the total records for all columns should be equal, taking just the first value
    # should be fine.
    query = table.query_total_records
    execute_query(query).each do |res|
      table.columns.each {|k, col| col.total_entries = res.values.first}
    end
  end

  violations = []
  database.tables.each do |table_name, table|
    table.columns.each do |col_name, col|
      next if col.missing_entries.to_i.zero?
      violations << Violation.new(database: database_name,
                                  metric: "Completeness",
                                  offender: "#{col.full_name.delete('"')}",
                                  violating_records: col.missing_entries.to_i,
                                  total_records: col.total_entries.to_i)
    end
  end
  return violations
end

#get_current_databaseObject

Return an object representing the current database structure.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/dbtools/database/db_connection.rb', line 26

def get_current_database
  query = %{select c.table_catalog as "table_catalog", c.table_schema as "table_schema", c.table_name as "table_name", c.column_name as "column_name", c.data_type as "data_type"
from information_schema.columns as c
join information_schema.tables as t
	on c.table_catalog = t.table_catalog
and c.table_schema = t.table_schema
and c.table_name = t.table_name
where c.data_type not like '%text%'
and c.table_schema not in ('information_schema', 'performance_schema', 'mysql', 'sys', 'pg_catalog')}
  database = DatabaseData.new(database_name)
  execute_query(query).each do |h|
    table_name = h['table_name']
    table_schema = h['table_schema']
    column_name = h['column_name']
    data_type = h['data_type']
    database.add_table(table_name, table_schema).add_column(column_name, data_type)
  end
  return database
end

#get_inverse_functional_propertyObject

Calculate the inverse functional property of the database.



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/dbtools/database/db_connection.rb', line 148

def get_inverse_functional_property
  database = @database

  # Run the query on every table.
  database.tables.values.each do |table|
    # This query counts all distinct values in every column.
    # The query should only return a single hashmap/dictionary as result.
    query = table.query_distinct_entries
    execute_query(query).each do |res|
      table.columns.each {|k, col| col.distinct_entries = res[k]}
    end
    # This query counts the total values in the table.
    # The query should only return a single hashmap/dictionary as result.
    # Since the total records for all columns should be equal, taking just the first value
    # should be fine.
    query = table.query_total_records
    execute_query(query).each do |res|
      table.columns.each {|k, col| col.total_entries = res.values.first}
    end
  end

  violations = []
  database.tables.each do |table_name, table|
    #puts "Table: #{table_name}:"
    table.columns.each do |col_name, col|
      #puts "\t #{col_name}: #{col.distinct_entries}/#{col.total_entries}" unless col.distinct_entries.to_i.zero?
      violating_records = col.distinct_entries.to_i
      next if violating_records.zero?
      violations << Violation.new(database: database_name,
                                  metric: "Inverse functional property",
                                  offender: "#{col.full_name.delete('"')}",
                                  violating_records: violating_records,
                                  total_records: col.total_entries.to_i)
    end
  end
  return violations
end

#get_syntax_compressionObject

Get the compression of the entries grouped by columns.



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/dbtools/database/db_connection.rb', line 117

def get_syntax_compression
  database = @database

  database.tables.values.each do |table|
    # This query counts all lowercased distinct values in every column.
    query = table.query_distinct_lowercased_entries
    execute_query(query).each do |res|
      table.columns.each {|k, col| col.distinct_lower_entries = res[k]}
    end
    # This query counts all distinct values in every column.
    query = table.query_distinct_entries
    execute_query(query).each do |res|
      table.columns.each {|k, col| col.distinct_entries = res[k]}
    end
  end

  violations = []
  database.tables.each do |table_name, table|
    table.columns.each do |col_name, col|
      next if col.distinct_lower_entries == col.distinct_entries
      violations << Violation.new(database: database_name,
                                  metric: "Syntax compression",
                                  offender: "#{col.full_name.delete('"')}",
                                  violating_records: col.distinct_entries.to_i - col.distinct_lower_entries.to_i,
                                  total_records: col.distinct_entries.to_i)
    end
  end
  return violations
end

#get_uppercase_columnsObject

Get the columns that are not downcase.



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/dbtools/database/db_connection.rb', line 187

def get_uppercase_columns
  database = @database
  violations = []

  database.tables.each do |table_name, table|
    table.columns.each do |col_name, col|
      next if col.name.downcase.eql?(col.name)
      violations << Violation.new(database: database_name,
                                  metric: "Uppercase column names",
                                  offender: "#{col.full_name.delete('"')}",
                                  solution: "#{table.schema}.#{table.name}.#{col.name.downcase.delete('"')}")
    end
  end
  return violations
end