Class: Flydata::TableDef::RedshiftTableDef

Inherits:
Object
  • Object
show all
Defined in:
lib/flydata/table_def/redshift_table_def.rb

Constant Summary collapse

TYPE_MAP_F2R =
{
  'binary' => {type: 'varchar', use_params: true, default_value: ''},
  'char' => {type: 'char', use_params: true, default_value: ''},
  'date' => {type: 'date', default_value: '0000-01-01'},
  'datetime' => {type: 'timestamp', default_value: '0000-01-01'},
  'enum' => {type: 'varchar encode bytedict', default_value: ''},
  'float4' => {type: 'float4', default_value: '0'},
  'float4 unsigned' => {type: 'float4', default_value: '0'},
  'float8' => {type: 'float8', default_value: '0'},
  'float8 unsigned' => {type: 'float8', default_value: '0'},
  'int1' => {type: 'int2', default_value: '0'},
  'int1 unsigned' => {type: 'int2', unsigned: true, default_value: '0'},
  'int2' => {type: 'int2', default_value: '0'},
  'int2 unsigned' => {type: 'int4', unsigned: true, default_value: '0'},
  'int3' => {type: 'int4', default_value: '0'},
  'int3 unsigned' => {type: 'int4', unsigned: true, default_value: '0'},
  'int4' => {type: 'int4', default_value: '0'},
  'int4 unsigned' => {type: 'int8', unsigned: true, default_value: '0'},
  'int8' => {type: 'int8', default_value: '0'},
  'int8 unsigned' => {type: 'numeric(20,0)', unsigned: true, default_value: '0'},
  'numeric' => {type: 'numeric', use_params: true, max_size: [38,37], default_value: '0'},
  'numeric unsigned' => {type: 'numeric', use_params: true, max_size: [38,37], default_value: '0'},
  'text' => {type: 'varchar(max)', default_value: ''},
  'time' => {type: 'timestamp', default_value: '0000-01-01'},
  'varbinary' => {type: 'varchar', use_params: true, max_size: 65535, default_value: ''},
  'varchar' => {type: 'varchar', use_params: true, max_size: 65535, default_value: ''},
}
FLYDATA_CTL_COLUMNS_TABLE =
"flydata_ctl_columns"
CREATE_FLYDATA_CTL_TABLE_SQL =
"CREATE TABLE %s(\n  id integer NOT NULL IDENTITY(1,1),\n  table_name varchar(128) NOT NULL,\n  column_name varchar(128) NOT NULL,\n  src_data_type varchar(1024) NOT NULL,\n  revision int NOT NULL DEFAULT 1,\n  ordinal_position int NOT NULL,\n  PRIMARY KEY(id)\n) DISTKEY(table_name) SORTKEY(table_name);\n"
CREATE_TABLE_SQL =
"DROP TABLE %s;\nCREATE TABLE %s (\n%s\n);\n"
NULL_STR =
"NULL"
FLYDATA_CTL_COLUMNS_SQL =
"DELETE FROM %s WHERE table_name = '%s';\nINSERT INTO %s (table_name, column_name, src_data_type, ordinal_position) VALUES\n"
APACHE_TIMESTAMP_REGEXP =
Regexp.new('^(?<apache_time_format>\[[0-3]\d\/\D{3}\/[1-2]\d{3}:[0-2]\d:[0-5]\d:[0-5]\d ?[\+\-]\d{2}:?\d{2}\])$')

Class Method Summary collapse

Class Method Details

.check_and_replace_max(params, max_size_a) ⇒ Object



177
178
179
180
181
182
183
184
# File 'lib/flydata/table_def/redshift_table_def.rb', line 177

def self.check_and_replace_max(params, max_size_a)
    final_params = []
    params.split(",").each_with_index do |param, i|
      final_params << (/\d+/.match(param) && max_size_a[i] && param.to_i > max_size_a[i].to_i ? 
                          max_size_a[i] : param)
    end
    final_params.join(",")
end

.column_def_sql(column, opt = {}) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/flydata/table_def/redshift_table_def.rb', line 89

def self.column_def_sql(column, opt = {})
  type = column[:type]
  if type =~ /\((.*?)\)/
    type = $` + $'
    params = $1
  end

  type_info = TYPE_MAP_F2R[type]
  raise "Unsupported type '#{column[:type]}'" if type_info.nil?

  rs_type = if type_info[:use_params] && params && !params.nil?
              params = check_and_replace_max(params, Array(type_info[:max_size])) if type_info[:max_size]
              type_info[:type] + "(#{params})"
            else
              type_info[:type]
            end
  line = %Q|  "#{column[:column]}" #{rs_type}|
  line += " NOT NULL" if column[:not_null]
  if (column.has_key?(:default))
    val = replace_default_value(type_info[:type], column[:default])
    line += " DEFAULT #{val}"
  elsif column[:not_null] && opt[:for] == :alter_table
    # Redshift doesn't allow adding a not null column without default value
    # Add a defalt value
    line += " DEFAULT '#{type_info[:default_value]}'"
  end
# Commented out because no IDENTITY column must be used for a replicated table.
# Values come from the master.
#    line += " IDENTITY(1, 1)" if (column[:auto_increment])

  line
end

.comment_sql(flydata_tabledef, schema_name) ⇒ Object



145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/flydata/table_def/redshift_table_def.rb', line 145

def self.comment_sql(flydata_tabledef, schema_name)
  sql = ""
  flydata_tabledef[:columns].each do |col|
    next unless col[:comment]

    sql += "COMMENT ON COLUMN \#{table_name_for_ddl(flydata_tabledef[:table_name], schema_name)}.\"\#{col[:column]}\"\nIS '\#{col[:comment]}';\n"
  end
  sql
end

.create_flydata_ctl_table_sql(schema_name) ⇒ Object



64
65
66
67
68
# File 'lib/flydata/table_def/redshift_table_def.rb', line 64

def self.create_flydata_ctl_table_sql(schema_name)
  # No drop table here intentionally because losing the data is fatal.
  tbl = flydata_ctl_table_for_ddl(schema_name)
  CREATE_FLYDATA_CTL_TABLE_SQL % [tbl, tbl]
end

.create_table_sql(flydata_tabledef, schema_name) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
# File 'lib/flydata/table_def/redshift_table_def.rb', line 77

def self.create_table_sql(flydata_tabledef, schema_name)
  lines = flydata_tabledef[:columns].collect{|column| column_def_sql(column) }
  pk_def = primary_key_sql(flydata_tabledef)
  lines << pk_def if pk_def

  contents = lines.join(",\n")

  table_name = flydata_tabledef[:table_name]
  redshift_tbl = table_name_for_ddl(table_name, schema_name)
  CREATE_TABLE_SQL % [redshift_tbl, redshift_tbl, contents]
end

.escape(text) ⇒ Object



173
174
175
# File 'lib/flydata/table_def/redshift_table_def.rb', line 173

def self.escape(text)
  text.gsub("'", "\\\\'")
end

.flydata_ctl_sql(flydata_tabledef, schema_name) ⇒ Object



162
163
164
165
166
167
168
169
170
171
# File 'lib/flydata/table_def/redshift_table_def.rb', line 162

def self.flydata_ctl_sql(flydata_tabledef, schema_name)
  flydata_ctl_tbl = flydata_ctl_table_for_ddl(schema_name)
  sql = FLYDATA_CTL_COLUMNS_SQL % [ flydata_ctl_tbl, flydata_tabledef[:table_name], flydata_ctl_tbl ]
  values = []
  flydata_tabledef[:columns].each.with_index(1) do |col, i|
    values << "('#{flydata_tabledef[:table_name]}', '#{col[:column]}', '#{escape(col[:type])}', #{i})"
  end
  sql += values.join(",\n") + ';'
  sql
end

.flydata_ctl_table_for_ddl(schema_name) ⇒ Object



60
61
62
# File 'lib/flydata/table_def/redshift_table_def.rb', line 60

def self.flydata_ctl_table_for_ddl(schema_name)
  table_name_for_ddl(FLYDATA_CTL_COLUMNS_TABLE, schema_name)
end

.from_flydata_tabledef(flydata_tabledef, options = {}) ⇒ Object



32
33
34
35
36
37
38
39
40
41
# File 'lib/flydata/table_def/redshift_table_def.rb', line 32

def self.from_flydata_tabledef(flydata_tabledef, options = {})
  options[:flydata_ctl_table] = true unless options.has_key?(:flydata_ctl_table)
  schema_name = options[:schema_name]

  tabledef = ""
  tabledef += create_flydata_ctl_table_sql(schema_name) if options[:flydata_ctl_table]
  tabledef += create_table_sql(flydata_tabledef, schema_name) unless options[:ctl_only]
  tabledef += comment_sql(flydata_tabledef, schema_name) unless options[:ctl_only]
  tabledef += flydata_ctl_sql(flydata_tabledef, schema_name)
end

.parse_date(value) ⇒ Object



209
210
211
212
213
214
215
216
# File 'lib/flydata/table_def/redshift_table_def.rb', line 209

def self.parse_date(value)
  dt = Date.parse(value)
  dt.strftime('%Y-%m-%d')
rescue ArgumentError => ae
  # '0000-00-00' is valid for mysql date column
  return '0001-01-01' if value == '0000-00-00'
  raise ae
end

.parse_timestamp(value) ⇒ Object



188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# File 'lib/flydata/table_def/redshift_table_def.rb', line 188

def self.parse_timestamp(value)
  if value.kind_of?(Integer) or /^\d+$/ === value
    # Unix epoch in UTC
    t = DateTime.strptime(value.to_s, '%s')
  elsif APACHE_TIMESTAMP_REGEXP.match(value)
    # apache time format
    t = DateTime.strptime(value, "[%d/%b/%Y:%H:%M:%S %Z]")
  else
    t = DateTime.parse(value)
  end
  t = t.new_offset(0) # Redshift Plug-in uses UTC
  t.strftime('%Y-%m-%d %H:%M:%S.%6N')
rescue ArgumentError => ae
  # '0000-00-00 00:00:00' is valid for mysql datetime column
  if value.start_with?('0000-00-00 00:00:00')
    return '0001-01-01 00:00:00.000000'
  else
    raise ae
  end
end

.primary_key_sql(flydata_tabledef) ⇒ Object



140
141
142
143
# File 'lib/flydata/table_def/redshift_table_def.rb', line 140

def self.primary_key_sql(flydata_tabledef)
  pks = flydata_tabledef[:columns].select{|col| col[:primary_key]}.collect{|col| col[:column]}
  pks.empty? ? nil : "  PRIMARY KEY (#{pks.join(',')})"
end

.replace_default_value(type, default_value) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/flydata/table_def/redshift_table_def.rb', line 124

def self.replace_default_value(type, default_value)
  return NULL_STR if default_value.nil?
  case type
  when 'timestamp'
    if default_value.upcase == "CURRENT_TIMESTAMP"
      'SYSDATE'
    else
      "'#{self.parse_timestamp(default_value)}'"
    end
  when 'date'
    "'#{self.parse_date(default_value)}'"
  else
    "'#{default_value}'"
  end
end

.table_name_for_ddl(table_name, schema_name) ⇒ Object



56
57
58
# File 'lib/flydata/table_def/redshift_table_def.rb', line 56

def self.table_name_for_ddl(table_name, schema_name)
  schema_name.to_s.empty? ? "\"#{table_name}\"" : "\"#{schema_name}\".\"#{table_name}\""
end