Class: Fluent::RedshiftOutput

Inherits:

BufferedOutput

Object
BufferedOutput
Fluent::RedshiftOutput

show all

Defined in:: lib/fluent/plugin/out_redshift_auto.rb

Constant Summary collapse

IGNORE_REDSHIFT_ERROR_REGEXP = ignore load table error. (invalid data format)

/^ERROR:  Load into table '[^']+' failed\./

Instance Method Summary collapse

#configure(conf) ⇒ Object
#format(tag, time, record) ⇒ Object
#initialize ⇒ RedshiftOutput constructor

A new instance of RedshiftOutput.
#start ⇒ Object
#write(chunk) ⇒ Object

Constructor Details

#initialize ⇒ `RedshiftOutput`

Returns a new instance of RedshiftOutput.

# File 'lib/fluent/plugin/out_redshift_auto.rb', line 10

def initialize
  super
  require 'aws-sdk'
  require 'zlib'
  require 'time'
  require 'tempfile'
  require 'pg'
  require 'json'
  require 'csv'
end

Instance Method Details

#configure(conf) ⇒ `Object`

# File 'lib/fluent/plugin/out_redshift_auto.rb', line 47

def configure(conf)
  super
  @path = "#{@path}/" if /.+[^\/]$/ =~ @path
  @path = "" if @path == "/"
  @utc = true if conf['utc']
  @db_conf = {
    host:@redshift_host,
    port:@redshift_port,
    dbname:@redshift_dbname,
    user:@redshift_user,
    password:@redshift_password
  }
  @delimiter = determine_delimiter(@file_type) if @delimiter.nil? or @delimiter.empty?
  $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
  @copy_sql_template = "copy #{@redshift_schemaname}.%s from '%s' CREDENTIALS 'aws_access_key_id=#{@aws_key_id};aws_secret_access_key=%s' delimiter '#{@delimiter}' GZIP TRUNCATECOLUMNS ESCAPE FILLRECORD ACCEPTANYDATE;"
end

#format(tag, time, record) ⇒ `Object`

# File 'lib/fluent/plugin/out_redshift_auto.rb', line 77

def format(tag, time, record)
  record = JSON.generate(record)
  if @make_auto_table == 1 && json?
    json = JSON.parse(record)
    cols = []
    json.each do |key,val|
      cols.push("#{key}")
    end
    make_table_from_tag_name(tag, cols)
  end

  (json?) ? record.to_msgpack : "#{record[@record_log_tag]}\n"
end

#start ⇒ `Object`

# File 'lib/fluent/plugin/out_redshift_auto.rb', line 64

def start
  super
  # init s3 conf
  $log.debug format_log("redshift file_type:#{@file_type} delimiter:'#{@delimiter}'")
  options = {
    :access_key_id     => @aws_key_id,
    :secret_access_key => @aws_sec_key
  }
  options[:s3_endpoint] = @s3_endpoint if @s3_endpoint
  @s3 = AWS::S3.new(options)
  @bucket = @s3.buckets[@s3_bucket]
end

#write(chunk) ⇒ `Object`

# File 'lib/fluent/plugin/out_redshift_auto.rb', line 91

def write(chunk)
  $log.debug format_log("start creating gz.")
  file_name = File::basename(chunk.path)
  table_name = file_name.sub(/\..*/, "")

  # create a gz file
  tmp = Tempfile.new("s3-")
  tmp = (json?) ? create_gz_file_from_json(tmp, chunk, @delimiter)
                : create_gz_file_from_msgpack(tmp, chunk)

  # no data -> skip
  unless tmp
    $log.debug format_log("received no valid data. ")
    return false # for debug
  end

  # create a file path with time format
  s3path = create_s3path(@bucket, @path)

  # upload gz to s3
  @bucket.objects[s3path].write(Pathname.new(tmp.path),
                                :acl => :bucket_owner_full_control)
  # copy gz on s3 to redshift
  s3_uri = "s3://#{@s3_bucket}/#{s3path}"
  sql = @copy_sql_template % [table_name, s3_uri, @aws_sec_key]
  $log.error format_log(sql)
  $log.debug  format_log("start copying. s3_uri=#{s3_uri}")
  conn = nil
  begin
    conn = PG.connect(@db_conf)
    conn.exec(sql)
    $log.info format_log("completed copying to redshift. s3_uri=#{s3_uri}")
  rescue PG::Error => e
    $log.error format_log("failed to copy data into redshift. s3_uri=#{s3_uri}"), :error=>e.to_s
    raise e unless e.to_s =~ IGNORE_REDSHIFT_ERROR_REGEXP
    return false # for debug
  ensure
    conn.close rescue nil if conn
  end
  true # for debug
end

Class: Fluent::RedshiftOutput

Constant Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ RedshiftOutput

Instance Method Details

#configure(conf) ⇒ Object

#format(tag, time, record) ⇒ Object

#start ⇒ Object

#write(chunk) ⇒ Object

#initialize ⇒ `RedshiftOutput`

#configure(conf) ⇒ `Object`

#format(tag, time, record) ⇒ `Object`

#start ⇒ `Object`

#write(chunk) ⇒ `Object`