Class: Flydata::TableDef::RedshiftTableDef

Inherits:
Object
  • Object
show all
Defined in:
lib/flydata/table_def/redshift_table_def.rb

Constant Summary collapse

TYPE_MAP_F2R =
{
  'binary' => {type: 'varchar', use_params: true, default_value: ''},
  'bit' => {type: 'bigint', default_value: '0'},
  'char' => {type: 'char', use_params: true, default_value: ''},
  'date' => {type: 'date', default_value: '0000-01-01'},
  'datetime' => {type: 'timestamp', default_value: '0000-01-01'},
  'enum' => {type: 'varchar encode bytedict', default_value: ''},
  'float4' => {type: 'float4', default_value: '0'},
  'float4 unsigned' => {type: 'float4', default_value: '0'},
  'float8' => {type: 'float8', default_value: '0'},
  'float8 unsigned' => {type: 'float8', default_value: '0'},
  'int1' => {type: 'int2', default_value: '0'},
  'int1 unsigned' => {type: 'int2', unsigned: true, default_value: '0'},
  'int2' => {type: 'int2', default_value: '0'},
  'int2 unsigned' => {type: 'int4', unsigned: true, default_value: '0'},
  'int3' => {type: 'int4', default_value: '0'},
  'int3 unsigned' => {type: 'int4', unsigned: true, default_value: '0'},
  'int4' => {type: 'int4', default_value: '0'},
  'int4 unsigned' => {type: 'int8', unsigned: true, default_value: '0'},
  'int8' => {type: 'int8', default_value: '0'},
  'int8 unsigned' => {type: 'numeric(20,0)', unsigned: true, default_value: '0'},
  'numeric' => {type: 'numeric', use_params: true, max_size: [38,37], default_value: '0'},
  'numeric unsigned' => {type: 'numeric', use_params: true, max_size: [38,37], default_value: '0'},
  'set' => {type: 'varchar encode bytedict', default_value: ''},
  'text' => {type: 'varchar(max)', default_value: ''},
  'time' => {type: 'timestamp', default_value: '0000-01-01'},
  'varbinary' => {type: 'varchar', use_params: true, max_size: 65535, default_value: ''},
  'varchar' => {type: 'varchar', use_params: true, max_size: 65535, default_value: ''},
}
FLYDATA_CTL_COLUMNS_TABLE =
"flydata_ctl_columns"
CREATE_FLYDATA_CTL_TABLE_SQL =
"CREATE TABLE %s(\n  id integer NOT NULL IDENTITY(1,1),\n  table_name varchar(128) NOT NULL,\n  column_name varchar(128) NOT NULL,\n  src_data_type varchar(1024) NOT NULL,\n  revision int NOT NULL DEFAULT 1,\n  ordinal_position int NOT NULL,\n  PRIMARY KEY(id)\n) DISTKEY(table_name) SORTKEY(table_name);\n"
CREATE_TABLE_SQL =
"DROP TABLE %s;\nCREATE TABLE %s (\n%s\n);\n"
NULL_STR =
"NULL"
FLYDATA_CTL_COLUMNS_SQL =
"DELETE FROM %s WHERE table_name = '%s';\nINSERT INTO %s (table_name, column_name, src_data_type, ordinal_position) VALUES\n"
APACHE_TIMESTAMP_REGEXP =
Regexp.new('^(?<apache_time_format>\[[0-3]\d\/\D{3}\/[1-2]\d{3}:[0-2]\d:[0-5]\d:[0-5]\d ?[\+\-]\d{2}:?\d{2}\])$')

Class Method Summary collapse

Class Method Details

.check_and_replace_max(params, max_size_a) ⇒ Object



184
185
186
187
188
189
190
191
# File 'lib/flydata/table_def/redshift_table_def.rb', line 184

def self.check_and_replace_max(params, max_size_a)
    final_params = []
    params.split(",").each_with_index do |param, i|
      final_params << (/\d+/.match(param) && max_size_a[i] && param.to_i > max_size_a[i].to_i ? 
                          max_size_a[i] : param)
    end
    final_params.join(",")
end

.column_def_sql(column, opt = {}) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/flydata/table_def/redshift_table_def.rb', line 96

def self.column_def_sql(column, opt = {})
  type = column[:type]
  if type =~ /\((.*?)\)/
    type = $` + $'
    params = $1
  end

  type_info = TYPE_MAP_F2R[type]
  raise "Unsupported type '#{column[:type]}'" if type_info.nil?

  rs_type = if type_info[:use_params] && params && !params.nil?
              params = check_and_replace_max(params, Array(type_info[:max_size])) if type_info[:max_size]
              type_info[:type] + "(#{params})"
            else
              type_info[:type]
            end
  line = %Q|  "#{column[:column]}" #{rs_type}|
  line += " NOT NULL" if column[:not_null]
  if (column.has_key?(:default))
    val = replace_default_value(type_info[:type], column[:default])
    line += " DEFAULT #{val}"
  elsif column[:not_null] && opt[:for] == :alter_table
    # Redshift doesn't allow adding a not null column without default value
    # Add a defalt value
    line += " DEFAULT '#{type_info[:default_value]}'"
  end
# Commented out because no IDENTITY column must be used for a replicated table.
# Values come from the master.
#    line += " IDENTITY(1, 1)" if (column[:auto_increment])

  line
end

.comment_sql(flydata_tabledef, schema_name) ⇒ Object



152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/flydata/table_def/redshift_table_def.rb', line 152

def self.comment_sql(flydata_tabledef, schema_name)
  sql = ""
  flydata_tabledef[:columns].each do |col|
    next unless col[:comment]

    sql += "COMMENT ON COLUMN \#{table_name_for_ddl(flydata_tabledef[:table_name], schema_name)}.\"\#{col[:column]}\"\nIS '\#{col[:comment]}';\n"
  end
  sql
end

.create_flydata_ctl_table_sql(schema_name) ⇒ Object



71
72
73
74
75
# File 'lib/flydata/table_def/redshift_table_def.rb', line 71

def self.create_flydata_ctl_table_sql(schema_name)
  # No drop table here intentionally because losing the data is fatal.
  tbl = flydata_ctl_table_for_ddl(schema_name)
  CREATE_FLYDATA_CTL_TABLE_SQL % [tbl, tbl]
end

.create_table_sql(flydata_tabledef, schema_name) ⇒ Object



84
85
86
87
88
89
90
91
92
93
94
# File 'lib/flydata/table_def/redshift_table_def.rb', line 84

def self.create_table_sql(flydata_tabledef, schema_name)
  lines = flydata_tabledef[:columns].collect{|column| column_def_sql(column) }
  pk_def = primary_key_sql(flydata_tabledef)
  lines << pk_def if pk_def

  contents = lines.join(",\n")

  table_name = flydata_tabledef[:table_name]
  redshift_tbl = table_name_for_ddl(table_name, schema_name)
  CREATE_TABLE_SQL % [redshift_tbl, redshift_tbl, contents]
end

.escape(text) ⇒ Object



180
181
182
# File 'lib/flydata/table_def/redshift_table_def.rb', line 180

def self.escape(text)
  text.gsub("'", "\\\\'")
end

.flydata_ctl_sql(flydata_tabledef, schema_name) ⇒ Object



169
170
171
172
173
174
175
176
177
178
# File 'lib/flydata/table_def/redshift_table_def.rb', line 169

def self.flydata_ctl_sql(flydata_tabledef, schema_name)
  flydata_ctl_tbl = flydata_ctl_table_for_ddl(schema_name)
  sql = FLYDATA_CTL_COLUMNS_SQL % [ flydata_ctl_tbl, flydata_tabledef[:table_name], flydata_ctl_tbl ]
  values = []
  flydata_tabledef[:columns].each.with_index(1) do |col, i|
    values << "('#{flydata_tabledef[:table_name]}', '#{col[:column]}', '#{escape(col[:type])}', #{i})"
  end
  sql += values.join(",\n") + ';'
  sql
end

.flydata_ctl_table_for_ddl(schema_name) ⇒ Object



67
68
69
# File 'lib/flydata/table_def/redshift_table_def.rb', line 67

def self.flydata_ctl_table_for_ddl(schema_name)
  table_name_for_ddl(FLYDATA_CTL_COLUMNS_TABLE, schema_name)
end

.from_flydata_tabledef(flydata_tabledef, options = {}) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/flydata/table_def/redshift_table_def.rb', line 34

def self.from_flydata_tabledef(flydata_tabledef, options = {})
  options[:flydata_ctl_table] = true unless options.has_key?(:flydata_ctl_table)
  schema_name = options[:schema_name]

  begin
    tabledef = ""
    tabledef += create_flydata_ctl_table_sql(schema_name) if options[:flydata_ctl_table]
    tabledef += create_table_sql(flydata_tabledef, schema_name) unless options[:ctl_only]
    tabledef += comment_sql(flydata_tabledef, schema_name) unless options[:ctl_only]
    tabledef += flydata_ctl_sql(flydata_tabledef, schema_name)
  rescue => e
    # Catch errors from generating schema. Generally an unsupported data type
    raise TableDefError, {error: "errors generating schema. Please contact us for further instructions", table: flydata_tabledef[:table_name]}
  end
end

.parse_date(value) ⇒ Object



216
217
218
219
220
221
222
223
# File 'lib/flydata/table_def/redshift_table_def.rb', line 216

def self.parse_date(value)
  dt = Date.parse(value)
  dt.strftime('%Y-%m-%d')
rescue ArgumentError => ae
  # '0000-00-00' is valid for mysql date column
  return '0001-01-01' if value == '0000-00-00'
  raise ae
end

.parse_timestamp(value) ⇒ Object



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/flydata/table_def/redshift_table_def.rb', line 195

def self.parse_timestamp(value)
  if value.kind_of?(Integer) or /^\d+$/ === value
    # Unix epoch in UTC
    t = DateTime.strptime(value.to_s, '%s')
  elsif APACHE_TIMESTAMP_REGEXP.match(value)
    # apache time format
    t = DateTime.strptime(value, "[%d/%b/%Y:%H:%M:%S %Z]")
  else
    t = DateTime.parse(value)
  end
  t = t.new_offset(0) # Redshift Plug-in uses UTC
  t.strftime('%Y-%m-%d %H:%M:%S.%6N')
rescue ArgumentError => ae
  # '0000-00-00 00:00:00' is valid for mysql datetime column
  if value.start_with?('0000-00-00 00:00:00')
    return '0001-01-01 00:00:00.000000'
  else
    raise ae
  end
end

.primary_key_sql(flydata_tabledef) ⇒ Object



147
148
149
150
# File 'lib/flydata/table_def/redshift_table_def.rb', line 147

def self.primary_key_sql(flydata_tabledef)
  pks = flydata_tabledef[:columns].select{|col| col[:primary_key]}.collect{|col| col[:column]}
  pks.empty? ? nil : "  PRIMARY KEY (#{pks.join(',')})"
end

.replace_default_value(type, default_value) ⇒ Object



131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/flydata/table_def/redshift_table_def.rb', line 131

def self.replace_default_value(type, default_value)
  return NULL_STR if default_value.nil?
  case type
  when 'timestamp'
    if default_value.upcase == "CURRENT_TIMESTAMP"
      'SYSDATE'
    else
      "'#{self.parse_timestamp(default_value)}'"
    end
  when 'date'
    "'#{self.parse_date(default_value)}'"
  else
    "'#{default_value}'"
  end
end

.table_name_for_ddl(table_name, schema_name) ⇒ Object



63
64
65
# File 'lib/flydata/table_def/redshift_table_def.rb', line 63

def self.table_name_for_ddl(table_name, schema_name)
  schema_name.to_s.empty? ? "\"#{table_name}\"" : "\"#{schema_name}\".\"#{table_name}\""
end