Class: ComputeUnit::NvidiaGpu
- Inherits:
-
Gpu
show all
- Defined in:
- lib/compute_unit/gpus/nvidia_gpu.rb
Constant Summary
collapse
- VENDOR_ID =
'10de'
- MAKE =
'Nvidia'
- SUBTYPE =
'nvidia'
- NVIDIA_SMI =
'/usr/bin/nvidia-smi'
- NVIDIA_PROC_PATH =
ENV['NVIDIA_PROC_PATH'] || File.join(ComputeUnit::Device::PROC_PATH, 'driver', 'nvidia', 'gpus')
Constants inherited
from Gpu
Gpu::DEVICE_CLASS, Gpu::DEVICE_CLASS_NAME
Constants inherited
from ComputeBase
ComputeBase::CACHE_TIMEOUT
Constants inherited
from Device
Device::PROC_PATH, Device::SYSFS_DEVICES_PATH
Instance Attribute Summary
Attributes inherited from Gpu
#bios, #name, #pci_loc, #use_opencl
Attributes inherited from ComputeBase
#compute_type, #index, #power_offset, #serial, #timestamp, #type, #uuid
Attributes inherited from Device
#device_class_id, #device_id, #device_path, #device_vendor_id, #make, #model, #subsystem_device_id, #subsystem_vendor_id, #vendor
Class Method Summary
collapse
-
.blank_data ⇒ Object
-
.create_from_path(device_path, index, use_opencl = false) ⇒ Object
-
.devices ⇒ Array
-
.find_all(use_opencl = false) ⇒ Array
-
.read_information_file(device_path) ⇒ Hash
GTX 1070”, :irq=>“130”, :gpu_uuid=>“GPU-0116fb5c-66f4-1cba-c216-97f4600a8152”, :video_bios=>“86.04.50.40.4a”, :bus_type=>“PCIe”, :dma_size=>“47 bits”, :dma_mask=>“0x7fffffffffff”, :bus_location=>“0000:0d:00.0”, :device_minor=>“7”.
Instance Method Summary
collapse
-
#core_clock ⇒ Integer
The current core clock speed.
-
#fan ⇒ Integer
-
#information_file ⇒ Object
-
#initialize(device_path, opts = {}) ⇒ NvidiaGpu
constructor
A new instance of NvidiaGpu.
-
#memory_clock ⇒ Integer
The current memory clock speed.
-
#memory_free ⇒ Object
-
#memory_total ⇒ Object
-
#memory_used ⇒ Object
-
#meta ⇒ Hash
return cached data or fetch new data.
-
#metadata ⇒ Hash
“memory.used [MiB]”: “2578 MiB”, “memory.free [MiB]”: “5534 MiB”, “memory.total [MiB]”: “8112 MiB”, “utilization.gpu [%]”: “100”, “temperature.gpu”: “53”, “power.draw [W]”: “129.21”, “power.limit [W]”: “130.00”, “power.max_limit [W]”: “217.00”, “pstate”: 2, “fan.speed [%]”: “75”.
-
#power ⇒ Float
The power being used by the gpu.
-
#power_limit ⇒ Object
-
#power_limit=(value) ⇒ Object
-
#power_max_limit ⇒ Object
-
#pstate ⇒ Object
-
#reset_metadata ⇒ Object
-
#set_fan_limit(_value, _type = 'current') ⇒ Numeric
-
#set_mem_clock_and_vddc(_mem_clock, _mem_volt) ⇒ Object
-
#subtype ⇒ Object
-
#temp ⇒ Object
-
#utilization ⇒ Object
Methods inherited from Gpu
#asic_temp, attached_processes, #compute_type, #configured_core_voltage, #core_voltage, #fan_limit, #fan_max_limit, #fan_min_limit, found_devices, #hardware_info, #mem_info, #mem_temp, #memory_volt, #opencl_board_name, opencl_cache, #opencl_device, opencl_devices, opencl_devices_from_cache, opencl_devices_from_platform, #opencl_name, #opencl_units, #status, #status_info, #to_h, #vddgfx, #voltage_table
Methods inherited from ComputeBase
#attached_processes, compute_classes, #device_class_name, #expired_metadata?, #top_processes
Methods included from Logger
color, log_file, log_level, logger, #logger
Methods inherited from Device
#base_hwmon_path, device, device_class, device_lookup, device_vendor, #expired_metadata?, #generic_model, #hwmon_path, #lock_rom, logger, manual_device_database, manual_device_lookup, manual_vendor_lookup, manual_vendors, name_map, name_translation, pci_database, #read_file, #read_hwmon_data, #read_kernel_setting, read_kernel_setting, #rom_data, #rom_path, subsystem_device, subsystem_device_lookup, subsystem_vendor, subsystem_vendor_lookup, #sysfs_model_name, system_checksum, #to_h, #to_json, #unlock_rom, vendor_lookup, #write_hwmon_data, #write_kernel_setting, write_kernel_setting
Methods included from Utils
check_for_root, #root?, root?
Constructor Details
#initialize(device_path, opts = {}) ⇒ NvidiaGpu
13
14
15
16
17
18
19
20
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 13
def initialize(device_path, opts = {})
data = self.class.read_information_file(device_path).merge(opts)
data[:pci_loc] = device_path
data[:busid] = data[:bus_location]
data[:bios] = data[:video_bios].upcase if data[:video_bios]
data[:uuid] = data[:gpu_uuid]
super(device_path, data)
end
|
Class Method Details
.blank_data ⇒ Object
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 41
def self.blank_data
@blank_data ||= {
'memory.used [MiB]' => '0',
'memory.free [MiB]' => '0',
'memory.total [MiB]' => '0',
'utilization.gpu [%]' => '0',
'temperature.gpu' => '0',
'power.draw [W]' => '0',
'power.limit [W]' => '0',
'power.max_limit [W]' => '0',
'pstate' => 7,
'fan.speed [%]' => '0',
'clocks.current.memory [MHz]' => '0',
'clocks.current.sm [MHz]' => '0'
}
end
|
.create_from_path(device_path, index, use_opencl = false) ⇒ Object
174
175
176
177
178
179
180
181
182
183
184
185
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 174
def self.create_from_path(device_path, index, use_opencl = false)
opts = {
device_class_id: device_class(device_path),
device_id: device(device_path),
device_vendor_id: device_vendor(device_path),
subsystem_vendor_id: subsystem_vendor(device_path),
subsystem_device_id: subsystem_device(device_path),
use_opencl: use_opencl,
index: index
}
new(device_path, opts)
end
|
.devices ⇒ Array
168
169
170
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 168
def self.devices
ComputeUnit::Gpu.devices.find_all { |f| device_vendor(f) == VENDOR_ID }
end
|
.find_all(use_opencl = false) ⇒ Array
188
189
190
191
192
193
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 188
def self.find_all(use_opencl = false)
devices.map.with_index do |device_path, _index|
found_index = ComputeUnit::Gpu.found_devices.index(device_path)
create_from_path(device_path, found_index, use_opencl)
end
end
|
GTX 1070”,
:irq=>"130",
:gpu_uuid=>"GPU-0116fb5c-66f4-1cba-c216-97f4600a8152",
:video_bios=>"86.04.50.40.4a",
:bus_type=>"PCIe",
:dma_size=>"47 bits",
:dma_mask=>"0x7fffffffffff",
:bus_location=>"0000:0d:00.0",
:device_minor=>"7"
211
212
213
214
215
216
217
218
219
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 211
def self.read_information_file(device_path)
device_name = File.basename(device_path)
information_file = File.join(NVIDIA_PROC_PATH, device_name, 'information')
File.open(information_file, 'r') do |file|
content = file.read
content.scan(/\n?([\w\s]*):\s+(.*)/).map { |key, value| [key.downcase.tr(' ', '_').to_sym, value] }.to_h
end
end
|
Instance Method Details
#core_clock ⇒ Integer
94
95
96
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 94
def core_clock
meta['clocks.current.sm [MHz]'].to_i
end
|
#fan ⇒ Integer
99
100
101
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 99
def fan
meta['fan.speed [%]'].to_i
end
|
160
161
162
163
164
165
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 160
def information_file
@information_file ||= begin
device_name = File.basename(device_path)
File.join(NVIDIA_PROC_PATH, device_name, 'information')
end
end
|
#memory_clock ⇒ Integer
89
90
91
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 89
def memory_clock
meta['clocks.current.memory [MHz]'].to_i
end
|
#memory_free ⇒ Object
152
153
154
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 152
def memory_free
meta['memory.free [MiB]']
end
|
#memory_total ⇒ Object
144
145
146
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 144
def memory_total
meta['memory.total [MiB]']
end
|
#memory_used ⇒ Object
148
149
150
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 148
def memory_used
meta['memory.used [MiB]']
end
|
return cached data or fetch new data
32
33
34
35
36
37
38
39
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 32
def meta
if expired_metadata?
logger.debug("Expired Nvidia Data for #{uuid} ")
@meta = metadata
else
@meta ||= metadata
end
end
|
Note:
data returned from nvidia-smi
“memory.used [MiB]”: “2578 MiB”,
"memory.free [MiB]": "5534 MiB",
"memory.total [MiB]": "8112 MiB",
"utilization.gpu [%]": "100",
"temperature.gpu": "53",
"power.draw [W]": "129.21",
"power.limit [W]": "130.00",
"power.max_limit [W]": "217.00",
"pstate": 2,
"fan.speed [%]": "75"
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 73
def metadata
logger.debug("Calling #{NVIDIA_SMI}")
data = `#{NVIDIA_SMI} --query-gpu=gpu_name,vbios_version,uuid,memory.used,memory.free,memory.total,utilization.gpu,temperature.gpu,power.draw,power.limit,power.max_limit,fan.speed,pstate,clocks.current.memory,clocks.current.sm -i #{index} --format=csv,nounits 2>&1`
unless $CHILD_STATUS.success?
logger.error(data.delete("\n"))
return self.class.blank_data
end
cards = if data
CSV.parse(data, headers: true, header_converters: ->(f) { f.strip },
converters: ->(f) { f ? f.strip : nil }).map(&:to_h)
end
cards.first
end
|
#power ⇒ Float
104
105
106
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 104
def power
meta['power.draw [W]'].strip.to_f + power_offset
end
|
#power_limit ⇒ Object
116
117
118
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 116
def power_limit
meta['power.limit [W]'].strip.to_f
end
|
#power_limit=(value) ⇒ Object
125
126
127
128
129
130
131
132
133
134
135
136
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 125
def power_limit=(value)
raise ArgumentError.new("Power value #{value.to_i} cannot exceed #{power_max_limit}") unless value.to_i.between?(1, power_max_limit.to_i)
output = `#{NVIDIA_SMI} -i #{index} -pl #{value}`
if $CHILD_STATUS.success?
logger.info("GPU#{index} power set to #{value} Watts")
else
logger.warn("GPU#{index} failed setting power to #{value}\n#{output}")
end
value.to_i
end
|
#power_max_limit ⇒ Object
120
121
122
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 120
def power_max_limit
meta['power.max_limit [W]'].strip.to_f
end
|
#pstate ⇒ Object
112
113
114
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 112
def pstate
meta['pstate'].to_i
end
|
26
27
28
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 26
def reset_metadata
@meta = nil
end
|
#set_fan_limit(_value, _type = 'current') ⇒ Numeric
Returns - original passed in value after being set.
140
141
142
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 140
def set_fan_limit(_value, _type = 'current')
raise NotImplementedError.new('Not implemented for Nvidia')
end
|
#set_mem_clock_and_vddc(_mem_clock, _mem_volt) ⇒ Object
195
196
197
198
199
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 195
def set_mem_clock_and_vddc(_mem_clock, _mem_volt)
return unless experimental_on?
logger.warn('Feature not enabled for nvidia')
end
|
#subtype ⇒ Object
22
23
24
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 22
def subtype
SUBTYPE
end
|
#temp ⇒ Object
108
109
110
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 108
def temp
meta['temperature.gpu'].to_i
end
|
#utilization ⇒ Object
156
157
158
|
# File 'lib/compute_unit/gpus/nvidia_gpu.rb', line 156
def utilization
meta['utilization.gpu [%]'].sub(/%/, '').to_i
end
|