Module: TensorStream::OpenCLHelpers::MathOps
- Included in:
- Evaluator::OpenclEvaluator
- Defined in:
- lib/tensor_stream/opencl/math_ops.rb
Overview
Collection of math functions for interfacing with OpenCL kernels
Class Method Summary collapse
Class Method Details
.included(klass) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# File 'lib/tensor_stream/opencl/math_ops.rb', line 5 def MathOps.included(klass) klass.class_eval do %i[max min add real_div div sub floor_mod mod mul pow sigmoid_grad squared_difference].each do |op| register_op op, noop: true do |context, tensor, inputs| execute_2_operand_func(op.to_s, tensor, inputs[0], inputs[1], context) end end register_op :add_n do |_context, tensor, inputs| if inputs.size == 1 inputs[0] else m, n = inputs[0].shape work_group = [m || 1, n || 1] cl_m = OpenCL::Int1.new(m || 1) cl_n = OpenCL::Int1.new(n || 1) cl_switch = OpenCL::Int1.new(0) dtype = tensor.data_type output_buffer = _create_result_buffer(tensor.data_type, inputs[0].shape, "out_#{tensor.name}") inputs_queue = inputs.dup a = inputs_queue.pop until inputs_queue.empty? b = inputs_queue.pop event_wait_list = build_event_wait_list([a, b]) method_call = :"add_#{a.data_type}_#{b.data_type}" event = _cl_program('add', a: a.data_type, b: b.data_type, dtype: dtype).send(method_call, _opencl_queue, work_group, cl_m, cl_n, cl_switch, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list) a = output_buffer a.op = event end output_buffer.op = a.op output_buffer end end register_op :floor_div, noop: true do |context, tensor, inputs| if fp_type?(tensor.data_type) execute_2_operand_func('floor_div', tensor, inputs[0], inputs[1], context) else execute_2_operand_func('div', tensor, inputs[0], inputs[1], context) end end register_op :mat_mul do |_context, tensor, inputs| a, b = inputs m = a.shape[0] n = b.shape[1] v = b.shape[0] k = a.shape[1] if tensor.[:transpose_a] m = a.shape[1] k = a.shape[0] end if tensor.[:transpose_b] n = b.shape[0] v = b.shape[1] end result_shape = [m, n] raise "#{tensor.inputs[0].name} rank must be greater than 1" if a.shape.size < 2 raise "#{tensor.inputs[1].name} rank must be greater than 1" if b.shape.size < 2 raise "incompatible shape sizes for matrix multiplication (#{a.shape[1]} != #{b.shape[0]}) #{a.shape} vs #{b.shape}" if k != v dtype = tensor.data_type a, b = auto_type_cast(a, b, name: "#{tensor.name}/cast_#{a.name}_#{b.data_type}") output_buffer = _create_result_buffer(a.data_type, result_shape, tensor.name) cl_m = OpenCL::Int1.new(m) cl_n = OpenCL::Int1.new(n) cl_k = OpenCL::Int1.new(k) transpose_a = OpenCL::Int1.new(tensor.[:transpose_a] ? 1 : 0) transpose_b = OpenCL::Int1.new(tensor.[:transpose_b] ? 1 : 0) event_wait_list = build_event_wait_list([a, b]) output_buffer.op = _cl_program('gemm', dtype: dtype).send(:"gemm_#{dtype}", _opencl_queue, result_shape, cl_m, cl_n, cl_k, transpose_a, transpose_b, a.cl_buffer, b.cl_buffer, output_buffer.cl_buffer, event_wait_list: event_wait_list) output_buffer end %i[sign exp tan acos asin sin cos abs sqrt negate square reciprocal tanh tanh_grad sigmoid log1p round floor ceil log].each do |op| register_op op, noop: true do |context, tensor, inputs| execute_func(op.to_s, tensor, inputs[0], context) end end %i[sum mean].each do |op| register_op op, noop: true do |context, tensor, inputs| reduction(context, tensor, inputs[0], inputs[1], op.to_sym) end end register_op :prod, noop: true do |context, tensor, inputs| input_a = complete_eval(inputs[0], context) if input_a.buffer.empty? convert_to_opencl([1.0], [], data_type: inputs[0].data_type, name: tensor.name) else reduction(context, tensor, inputs[0], inputs[1], :prod) end end register_op :argmin, buffer: true do |_context, tensor, inputs| axis = tensor.[:axis] || 0 rank = inputs[0].shape.size raise TensorStream::InvalidArgumentError, "Expected dimension in the range [#{-rank},#{rank}) but got #{axis}" if axis < -rank || axis >= rank arr = inputs[0].buffer.reshape(*inputs[0].shape.reverse).to_a op = get_op_with_axis(arr, axis, 0, inputs[0].data_type, ->(a, b) { a < b }) convert_to_opencl(op, shape_eval(op), data_type: tensor.data_type, name: tensor.name) end register_op :argmax, buffer: true do |_context, tensor, inputs| axis = tensor.[:axis] || 0 rank = inputs[0].shape.size raise TensorStream::InvalidArgumentError, "Expected dimension in the range [#{-rank},#{rank}) but got #{axis}" if axis < -rank || axis >= rank arr = inputs[0].buffer.reshape(*inputs[0].shape.reverse).to_a op = get_op_with_axis(arr, axis, 0, inputs[0].data_type, ->(a, b) { a > b }) convert_to_opencl(op, shape_eval(op), data_type: tensor.data_type, name: tensor.name) end end end |