Class: TensorStream::Evaluator::OpenclEvaluator

Inherits:
BaseEvaluator
  • Object
show all
Includes:
ArrayOpsHelper, CLEventHelpers, MathHelper, OpHelper, OpenCLHelpers::ArrayOps, OpenCLHelpers::ImagesOps, OpenCLHelpers::MathOps, OpenCLHelpers::NNOps, OpenCLHelpers::RandomOps
Defined in:
lib/tensor_stream/opencl/opencl_evaluator.rb

Overview

OpenCL hardware accelerated evaluator

Constant Summary

Constants included from OpenCLHelpers::RandomOps

OpenCLHelpers::RandomOps::RAND_TABLE_SIZE

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from CLEventHelpers

#build_event_wait_list

Methods included from OpenCLHelpers::RandomOps

included

Methods included from OpenCLHelpers::ArrayOps

included

Methods included from OpenCLHelpers::ImagesOps

included

Methods included from OpenCLHelpers::NNOps

included

Methods included from OpenCLHelpers::MathOps

included

Constructor Details

#initialize(session, device, thread_pool: nil, log_intermediates: false) ⇒ OpenclEvaluator

Returns a new instance of OpenclEvaluator.



56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 56

def initialize(session, device, thread_pool: nil, log_intermediates: false)
  super
  _create_opencl_context
  @opencl_device = device.native_device

  @max_work_item_dimensions = @opencl_device.max_work_item_dimensions
  @max_work_item_sizes = @opencl_device.max_work_item_sizes
  @max_work_group_size = @opencl_device.max_work_group_size

  @local_mem_size = @opencl_device.local_mem_size
  @device_type = @opencl_device.type.to_s.downcase

  create_command_queue
end

Instance Attribute Details

#context=(value) ⇒ Object (writeonly)

Sets the attribute context

Parameters:

  • value

    the value to set the attribute context to.



44
45
46
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 44

def context=(value)
  @context = value
end

#opencl_contextObject (readonly)

Returns the value of attribute opencl_context.



43
44
45
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 43

def opencl_context
  @opencl_context
end

#opencl_deviceObject (readonly)

Returns the value of attribute opencl_device.



43
44
45
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 43

def opencl_device
  @opencl_device
end

#retainObject

Returns the value of attribute retain.



42
43
44
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 42

def retain
  @retain
end

Class Method Details

.default_deviceObject

Select the best device available in the system for this evaluator



100
101
102
103
104
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 100

def default_device
  devices = OpenclEvaluator.query_devices_with_score
  device = devices.max { |a, b| a[1] <=> b[1] }
  opencl_to_device(device)
end

.fetch_device(query = []) ⇒ Object



79
80
81
82
83
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 79

def fetch_device(query = [])
  devices = query_devices_with_score
  platform_devices = devices.select { |d| d[0].platform.to_s.tr(' ', '_').downcase =~ /#{query[0].downcase}/ }
  opencl_to_device(platform_devices[[query[1].to_i, platform_devices.size - 1].min])
end

.getset_global_opencl_context(platform) ⇒ Object



106
107
108
109
110
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 106

def getset_global_opencl_context(platform)
  @global_opencl_context ||= {}
  @global_opencl_context[platform] ||= yield
  @global_opencl_context[platform]
end

.opencl_to_device(dev) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 85

def opencl_to_device(dev)
  device = dev[0]
  index = dev[3]
  platform_name = device.platform.name.tr(' ', '_').downcase
  uri = [platform_name, index].join(':')

  device_type = device.type.to_s == 'GPU' ? :gpu : :cpu

  OpenclDevice.new(uri, device_type, self).tap do |d|
    d.native_device = device
  end
end

.query_devices_with_scoreObject



191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 191

def self.query_devices_with_score
  OpenCL.platforms.flat_map do |p|
    p.devices.select { |d| d.available > 0 }.each_with_index.collect do |d, index|
      score = 0

      if d.type.to_s == 'CPU'
        score += 1
      elsif d.type.to_s == 'GPU'
        score += 4
      end

      score += 1000 if d.platform.name == 'NVIDIA CUDA'

      score += d.max_compute_units * d.max_clock_frequency

      [d, score, p.name, index]
    end
  end
end

.query_supported_devicesObject



72
73
74
75
76
77
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 72

def query_supported_devices
  devices = query_devices_with_score
  devices.sort_by { |a| a[1] }.map do |d|
    opencl_to_device(d)
  end
end

Instance Method Details

#complete_eval(tensor, context) ⇒ Object



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 173

def complete_eval(tensor, context)
  return nil if tensor.nil?

  buffers = if tensor.is_a?(Array)
              tensor.map { |t|
                enqueue_buffer_read(t, context)
              }
            else
              [enqueue_buffer_read(tensor, context)]
            end

  events = build_event_wait_list(buffers)
  # puts "** wait #{tensor.name} **"
  OpenCL.wait_for_events(events) unless events.empty?
  # puts "** done #{tensor.name} **"
  tensor.is_a?(Array) ? buffers : buffers.first
end

#convert_from_buffer(tensor, result) ⇒ Object

buffer comes from non-opencl evaluator



138
139
140
141
142
143
144
145
146
147
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 138

def convert_from_buffer(tensor, result)
  if result.buffer.is_a?(TensorStream::Evaluator::OutputGroup)
    converted_outputs = result.buffer.outputs.zip(result.buffer.data_types).map do |output, data_type|
      convert_to_opencl([output].flatten, shape_eval(output), data_type: data_type, name: tensor.name)
    end
    TensorStream::Evaluator::OutputGroup.new(converted_outputs, result.buffer.data_types)
  else
    convert_to_opencl([result.buffer].flatten, shape_eval(result.buffer), data_type: result.data_type, name: tensor.name)
  end
end

#enqueue_buffer_read(tensor, context) ⇒ Object

Generate OpenCL instruction to read back from GPU memory to Host memory for a tensor



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 150

def enqueue_buffer_read(tensor, context)
  buffer = _run(tensor, context)
  if buffer.is_a?(Array)
    buffer.collect do |b|
      next b if b.buffer.size.zero?

      b.op = _opencl_queue.enqueue_read_buffer(b.cl_buffer, b.buffer, event_wait_list: build_event_wait_list([b]))
      b
    end
  else
    return buffer.outputs[0] if buffer.is_a?(OutputGroup)
    return buffer if buffer.nil?
    return [] if buffer.buffer.nil?
    return buffer if buffer.buffer.size.zero?

    # lazy allocate
    buffer.buffer = OpenCLBuffer.allocate_narray_for_type(buffer.buffer.data_type, buffer.buffer.size) if buffer.buffer.is_a?(OpenCLBuffer::LazyBuffer)

    buffer.op = _opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: build_event_wait_list([buffer]))
    buffer
  end
end

#run(tensor, execution_context) ⇒ Object

opencl evaluator main entrypoint



114
115
116
117
118
119
120
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 114

def run(tensor, execution_context)
  result = complete_eval(tensor, execution_context)
  # puts "-------------------wait finish------------------------"
  _opencl_queue.finish
  # puts "-------------------done finish------------------------"
  read_final_result(result)
end

#run_with_buffer(tensor, context, execution_context) ⇒ Object



122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 122

def run_with_buffer(tensor, context, execution_context)
  @context = context
  @context[:_cache][:_cl_buffers] ||= {} if context[:_cache]

  if tensor.is_a?(Array)
    tensor.collect do |t|
      value = run(t, execution_context)
      Buffer.new(data_type: t.data_type, buffer: value)
    end
  else
    value = run(tensor, execution_context)
    Buffer.new(data_type: tensor.data_type, buffer: value)
  end
end