Class: LightGBM::Dataset

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/lightgbm/dataset.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_name: nil, feature_names: nil) ⇒ Dataset

Returns a new instance of Dataset.



7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/lightgbm/dataset.rb', line 7

def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_name: nil, feature_names: nil)
  @data = data
  @label = label
  @weight = weight
  @group = group
  @params = params
  @reference = reference
  @used_indices = used_indices
  @categorical_feature = categorical_feature
  @feature_name = feature_name || feature_names || "auto"

  construct
end

Instance Attribute Details

#dataObject (readonly)

Returns the value of attribute data.



5
6
7
# File 'lib/lightgbm/dataset.rb', line 5

def data
  @data
end

#paramsObject (readonly)

Returns the value of attribute params.



5
6
7
# File 'lib/lightgbm/dataset.rb', line 5

def params
  @params
end

Instance Method Details

#feature_nameObject Also known as: feature_names



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/lightgbm/dataset.rb', line 29

def feature_name
  # must preallocate space
  num_feature_names = ::FFI::MemoryPointer.new(:int)
  out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
  len = 1000
  out_strs = ::FFI::MemoryPointer.new(:pointer, len)
  buffer_len = 255
  str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) }
  out_strs.write_array_of_pointer(str_ptrs)
  safe_call FFI.LGBM_DatasetGetFeatureNames(@handle, len, num_feature_names, buffer_len, out_buffer_len, out_strs)

  num_features = num_feature_names.read_int
  actual_len = out_buffer_len.read(:size_t)
  if num_features > len || actual_len > buffer_len
    out_strs = ::FFI::MemoryPointer.new(:pointer, num_features) if num_features > len
    str_ptrs = num_features.times.map { ::FFI::MemoryPointer.new(:char, actual_len) }
    out_strs.write_array_of_pointer(str_ptrs)
    safe_call FFI.LGBM_DatasetGetFeatureNames(@handle, num_features, num_feature_names, actual_len, out_buffer_len, out_strs)
  end

  # should be the same, but get number of features
  # from most recent call (instead of num_features)
  str_ptrs[0, num_feature_names.read_int].map(&:read_string)
end

#feature_name=(feature_names) ⇒ Object Also known as: feature_names=



70
71
72
73
74
75
76
77
78
# File 'lib/lightgbm/dataset.rb', line 70

def feature_name=(feature_names)
  feature_names = feature_names.map(&:to_s)
  @feature_names = feature_names
  c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
  # keep reference to string pointers
  str_ptrs = feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) }
  c_feature_names.write_array_of_pointer(str_ptrs)
  safe_call FFI.LGBM_DatasetSetFeatureNames(@handle, c_feature_names, feature_names.size)
end

#group=(group) ⇒ Object



65
66
67
68
# File 'lib/lightgbm/dataset.rb', line 65

def group=(group)
  @group = group
  set_field("group", group, type: :int32)
end

#handleObject



115
116
117
# File 'lib/lightgbm/dataset.rb', line 115

def handle
  @handle
end

#labelObject



21
22
23
# File 'lib/lightgbm/dataset.rb', line 21

def label
  field("label")
end

#label=(label) ⇒ Object



55
56
57
58
# File 'lib/lightgbm/dataset.rb', line 55

def label=(label)
  @label = label
  set_field("label", label)
end

#num_dataObject



89
90
91
92
93
# File 'lib/lightgbm/dataset.rb', line 89

def num_data
  out = ::FFI::MemoryPointer.new(:int)
  safe_call FFI.LGBM_DatasetGetNumData(@handle, out)
  out.read_int
end

#num_featureObject



95
96
97
98
99
# File 'lib/lightgbm/dataset.rb', line 95

def num_feature
  out = ::FFI::MemoryPointer.new(:int)
  safe_call FFI.LGBM_DatasetGetNumFeature(@handle, out)
  out.read_int
end

#reference=(reference) ⇒ Object

TODO only update reference if not in chain



82
83
84
85
86
87
# File 'lib/lightgbm/dataset.rb', line 82

def reference=(reference)
  if reference != @reference
    @reference = reference
    construct
  end
end

#save_binary(filename) ⇒ Object



101
102
103
# File 'lib/lightgbm/dataset.rb', line 101

def save_binary(filename)
  safe_call FFI.LGBM_DatasetSaveBinary(@handle, filename)
end

#subset(used_indices, params: nil) ⇒ Object



105
106
107
108
109
110
111
112
113
# File 'lib/lightgbm/dataset.rb', line 105

def subset(used_indices, params: nil)
  # categorical_feature passed via params
  params ||= self.params
  Dataset.new(nil,
    params: params,
    reference: self,
    used_indices: used_indices
  )
end

#weightObject



25
26
27
# File 'lib/lightgbm/dataset.rb', line 25

def weight
  field("weight")
end

#weight=(weight) ⇒ Object



60
61
62
63
# File 'lib/lightgbm/dataset.rb', line 60

def weight=(weight)
  @weight = weight
  set_field("weight", weight)
end