Class: Embulk::Output::Vertica::OutputThread
- Inherits:
-
Object
- Object
- Embulk::Output::Vertica::OutputThread
- Defined in:
- lib/embulk/output/vertica/output_thread.rb
Instance Method Summary collapse
- #abort_on_error ⇒ Object
- #commit ⇒ Object
- #compress ⇒ Object
-
#copy(conn, sql, &block) ⇒ Object
private.
- #copy_mode ⇒ Object
- #copy_sql ⇒ Object
- #enqueue(page) ⇒ Object
- #fjsonparser ⇒ Object
-
#initialize(task, schema, converters) ⇒ OutputThread
constructor
A new instance of OutputThread.
- #quoted_schema ⇒ Object
- #quoted_table ⇒ Object
- #quoted_temp_table ⇒ Object
- #reject_on_materialized_type_error ⇒ Object
- #run ⇒ Object
- #start ⇒ Object
- #to_json(record) ⇒ Object
- #write_buf(buf, page, &block) ⇒ Object
- #write_gzip(io, page, &block) ⇒ Object
- #write_uncompressed(io, page, &block) ⇒ Object
Constructor Details
#initialize(task, schema, converters) ⇒ OutputThread
Returns a new instance of OutputThread.
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 32 def initialize(task, schema, converters) @task = task @schema = schema @queue = SizedQueue.new(1) @converters = converters @num_input_rows = 0 @num_output_rows = 0 @num_rejected_rows = 0 @outer_thread = Thread.current @thread_active = false case task['compress'] when 'GZIP' @write_proc = self.method(:write_gzip) else @write_proc = self.method(:write_uncompressed) end end |
Instance Method Details
#abort_on_error ⇒ Object
193 194 195 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 193 def abort_on_error @task['abort_on_error'] ? ' ABORT ON ERROR' : '' end |
#commit ⇒ Object
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 138 def commit @thread_active = false if @thread.alive? @queue.push('finish') Embulk.logger.trace { "embulk-output-vertica: pushed finish" } Thread.pass @thread.join else raise RuntimeError, "embulk-output-vertica: thread died accidently" end task_report = { 'num_input_rows' => @num_input_rows, 'num_output_rows' => @num_output_rows, 'num_rejected_rows' => @num_rejected_rows, } end |
#compress ⇒ Object
185 186 187 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 185 def compress " #{@task['compress']}" end |
#copy(conn, sql, &block) ⇒ Object
private
158 159 160 161 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 158 def copy(conn, sql, &block) Embulk.logger.debug "embulk-output-vertica: #{sql}" results, rejects = conn.copy(sql, &block) end |
#copy_mode ⇒ Object
189 190 191 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 189 def copy_mode " #{@task['copy_mode']}" end |
#copy_sql ⇒ Object
163 164 165 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 163 def copy_sql @copy_sql ||= "COPY #{quoted_schema}.#{quoted_temp_table} FROM STDIN#{compress}#{fjsonparser}#{copy_mode}#{abort_on_error} NO COMMIT" end |
#enqueue(page) ⇒ Object
51 52 53 54 55 56 57 58 59 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 51 def enqueue(page) if @thread_active and @thread.alive? Embulk.logger.trace { "embulk-output-vertica: enqueue" } @queue.push(page) else Embulk.logger.info { "embulk-output-vertica: thread is dead, but still trying to enqueue" } raise RuntimeError, "embulk-output-vertica: thread is died, but still trying to enqueue" end end |
#fjsonparser ⇒ Object
197 198 199 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 197 def fjsonparser " PARSER fjsonparser(#{reject_on_materialized_type_error})" end |
#quoted_schema ⇒ Object
173 174 175 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 173 def quoted_schema ::Jvertica.quote_identifier(@task['schema']) end |
#quoted_table ⇒ Object
177 178 179 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 177 def quoted_table ::Jvertica.quote_identifier(@task['table']) end |
#quoted_temp_table ⇒ Object
181 182 183 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 181 def quoted_temp_table ::Jvertica.quote_identifier(@task['temp_table']) end |
#reject_on_materialized_type_error ⇒ Object
201 202 203 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 201 def reject_on_materialized_type_error @task['reject_on_materialized_type_error'] ? 'reject_on_materialized_type_error=true' : '' end |
#run ⇒ Object
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 84 def run Embulk.logger.debug { "embulk-output-vertica: thread started" } Vertica.connect(@task) do |jv| json = nil # for log begin last_record = nil num_output_rows, rejects = copy(jv, copy_sql) do |stdin| while page = @queue.pop if page == 'finish' Embulk.logger.trace { "embulk-output-vertica: popped finish" } break end Embulk.logger.trace { "embulk-output-vertica: dequeued" } @write_proc.call(stdin, page) do |record| last_record = record end end end Embulk.logger.debug { "embulk-output-vertica: thread finished" } num_rejected_rows = rejects.size @num_output_rows += num_output_rows @num_rejected_rows += num_rejected_rows Embulk.logger.info { "embulk-output-vertica: COMMIT!" } jv.commit Embulk.logger.debug { "embulk-output-vertica: COMMITTED!" } rescue java.sql.SQLDataException => e if @task['reject_on_materialized_type_error'] and e. =~ /Rejected by user-defined parser/ Embulk.logger.warn "embulk-output-vertica: ROLLBACK! some of column types and values types do not fit #{json}" else Embulk.logger.warn "embulk-output-vertica: ROLLBACK!" end Embulk.logger.info { "embulk-output-vertica: last_record: #{to_json(last_record)}" } jv.rollback raise e # die transaction rescue => e Embulk.logger.warn "embulk-output-vertica: ROLLBACK!" jv.rollback raise e end end rescue => e @thread_active = false # not to be enqueued any more while @queue.size > 0 @queue.pop # dequeue all because some might be still trying @queue.push and get blocked, need to release end @outer_thread.raise e.class.new("#{e.message}\n #{e.backtrace.join("\n ")}") end |
#start ⇒ Object
133 134 135 136 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 133 def start @thread = Thread.new(&method(:run)) @thread_active = true end |
#to_json(record) ⇒ Object
167 168 169 170 171 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 167 def to_json(record) Hash[*(@schema.names.zip(record).map do |column_name, value| [column_name, @converters[column_name].call(value)] end.flatten!(1))].to_json end |
#write_buf(buf, page, &block) ⇒ Object
73 74 75 76 77 78 79 80 81 82 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 73 def write_buf(buf, page, &block) page.each do |record| yield(record) if block_given? Embulk.logger.trace { "embulk-output-vertica: record #{record}" } json = to_json(record) Embulk.logger.trace { "embulk-output-vertica: to_json #{json}" } buf << json << "\n" @num_input_rows += 1 end end |
#write_gzip(io, page, &block) ⇒ Object
61 62 63 64 65 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 61 def write_gzip(io, page, &block) buf = Zlib::Deflate.new write_buf(buf, page, &block) io << buf.finish end |
#write_uncompressed(io, page, &block) ⇒ Object
67 68 69 70 71 |
# File 'lib/embulk/output/vertica/output_thread.rb', line 67 def write_uncompressed(io, page, &block) buf = '' write_buf(buf, page, &block) io << buf end |