Class: TensorStream::Evaluator::OpenclEvaluator

Inherits:
BaseEvaluator
  • Object
show all
Includes:
ArrayOpsHelper, MathHelper, OpHelper, OpenCLHelpers::ArrayOps, OpenCLHelpers::ImagesOps, OpenCLHelpers::MathOps, OpenCLHelpers::NNOps
Defined in:
lib/tensor_stream/opencl/opencl_evaluator.rb

Overview

OpenCL hardware accelerated evaluator

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from OpenCLHelpers::ArrayOps

included

Methods included from OpenCLHelpers::ImagesOps

included

Methods included from OpenCLHelpers::NNOps

included

Methods included from OpenCLHelpers::MathOps

included

Constructor Details

#initialize(session, device, thread_pool: nil, log_intermediates: false) ⇒ OpenclEvaluator

Returns a new instance of OpenclEvaluator.



53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 53

def initialize(session, device, thread_pool: nil, log_intermediates: false)
  super
  _create_opencl_context
  @opencl_device = device.native_device

  @max_work_item_dimensions = @opencl_device.max_work_item_dimensions
  @max_work_item_sizes = @opencl_device.max_work_item_sizes
  @max_work_group_size = @opencl_device.max_work_group_size

  @local_mem_size = @opencl_device.local_mem_size
  @device_type = @opencl_device.type.to_s.downcase

  create_command_queue
end

Instance Attribute Details

#context=(value) ⇒ Object (writeonly)

Sets the attribute context

Parameters:

  • value

    the value to set the attribute context to.



43
44
45
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 43

def context=(value)
  @context = value
end

#opencl_contextObject (readonly)

Returns the value of attribute opencl_context.



42
43
44
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 42

def opencl_context
  @opencl_context
end

#opencl_deviceObject (readonly)

Returns the value of attribute opencl_device.



42
43
44
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 42

def opencl_device
  @opencl_device
end

#retainObject

Returns the value of attribute retain.



41
42
43
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 41

def retain
  @retain
end

Class Method Details

.default_deviceObject

Select the best device available in the system for this evaluator



97
98
99
100
101
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 97

def default_device
  devices = OpenclEvaluator.query_devices_with_score
  device = devices.max { |a, b| a[1] <=> b[1] }
  opencl_to_device(device)
end

.fetch_device(query = []) ⇒ Object



76
77
78
79
80
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 76

def fetch_device(query = [])
  devices = query_devices_with_score
  platform_devices = devices.select { |d| d[0].platform.to_s.tr(' ', '_').downcase =~ /#{query[0].downcase}/ }
  opencl_to_device(platform_devices[[query[1].to_i, platform_devices.size - 1].min])
end

.getset_global_opencl_context(platform) ⇒ Object



103
104
105
106
107
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 103

def getset_global_opencl_context(platform)
  @global_opencl_context ||= {}
  @global_opencl_context[platform] ||= yield
  @global_opencl_context[platform]
end

.opencl_to_device(dev) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 82

def opencl_to_device(dev)
  device = dev[0]
  index = dev[3]
  platform_name = device.platform.name.tr(' ', '_').downcase
  uri = [platform_name, index].join(':')

  device_type = device.type.to_s == 'GPU' ? :gpu : :cpu

  OpenclDevice.new(uri, device_type, self).tap do |d|
    d.native_device = device
  end
end

.query_devices_with_scoreObject



178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 178

def self.query_devices_with_score
  OpenCL.platforms.flat_map do |p|
    p.devices.select { |d| d.available > 0 }.each_with_index.collect do |d, index|
      score = 0

      if d.type.to_s == 'CPU'
        score += 1
      elsif d.type.to_s == 'GPU'
        score += 4
      end

      score += 1000 if d.platform.name == 'NVIDIA CUDA'

      score += d.max_compute_units * d.max_clock_frequency

      [d, score, p.name, index]
    end
  end
end

.query_supported_devicesObject



69
70
71
72
73
74
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 69

def query_supported_devices
  devices = query_devices_with_score
  devices.sort_by { |a| a[1] }.map do |d|
    opencl_to_device(d)
  end
end

Instance Method Details

#complete_eval(tensor, context) ⇒ Object



167
168
169
170
171
172
173
174
175
176
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 167

def complete_eval(tensor, context)
  return nil if tensor.nil?

  buffer = enqueue_buffer_read(tensor, context)
  events = build_event_wait_list([buffer])
  # puts "** wait #{tensor.name} **"
  OpenCL.wait_for_events(events) unless events.empty?
  # puts "** done #{tensor.name} **"
  buffer
end

#convert_from_buffer(tensor, result) ⇒ Object

buffer comes from non-opencl evaluator



135
136
137
138
139
140
141
142
143
144
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 135

def convert_from_buffer(tensor, result)
  if result.buffer.is_a?(TensorStream::Evaluator::OutputGroup)
    converted_outputs = result.buffer.outputs.zip(result.buffer.data_types).map do |output, data_type|
      convert_to_opencl([output].flatten, shape_eval(output), data_type: data_type, name: tensor.name)
    end
    TensorStream::Evaluator::OutputGroup.new(converted_outputs, result.buffer.data_types)
  else
    convert_to_opencl([result.buffer].flatten, shape_eval(result.buffer), data_type: result.data_type, name: tensor.name)
  end
end

#enqueue_buffer_read(tensor, context) ⇒ Object

Generate OpenCL instruction to read back from GPU memory to Host memory for a tensor



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 147

def enqueue_buffer_read(tensor, context)
  buffer = _run(tensor, context)
  if buffer.is_a?(Array)
    buffer.collect do |b|
      next b if b.buffer.size.zero?

      b.op = _opencl_queue.enqueue_read_buffer(b.cl_buffer, b.buffer, event_wait_list: build_event_wait_list([b]))
      b
    end
  else
    return buffer.outputs[0] if buffer.is_a?(OutputGroup)
    return buffer if buffer.nil?
    return [] if buffer.buffer.nil?
    return buffer if buffer.buffer.size.zero?

    buffer.op = _opencl_queue.enqueue_read_buffer(buffer.cl_buffer, buffer.buffer, event_wait_list: build_event_wait_list([buffer]))
    buffer
  end
end

#run(tensor, execution_context) ⇒ Object

opencl evaluator main entrypoint



111
112
113
114
115
116
117
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 111

def run(tensor, execution_context)
  result = complete_eval(tensor, execution_context)
  # puts "-------------------wait finish------------------------"
  _opencl_queue.finish
  # puts "-------------------done finish------------------------"
  read_final_result(result)
end

#run_with_buffer(tensor, context, execution_context) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/tensor_stream/opencl/opencl_evaluator.rb', line 119

def run_with_buffer(tensor, context, execution_context)
  @context = context
  @context[:_cache][:_cl_buffers] ||= {} if context[:_cache]

  if tensor.is_a?(Array)
    tensor.collect do |t|
      value = run(t, execution_context)
      Buffer.new(data_type: t.data_type, buffer: value)
    end
  else
    value = run(tensor, execution_context)
    Buffer.new(data_type: tensor.data_type, buffer: value)
  end
end