Class: XGBoost::Booster

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/xgboost/booster.rb

Instance Method Summary collapse

Constructor Details

#initialize(params: nil, cache: nil, model_file: nil) ⇒ Booster

Returns a new instance of Booster.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/xgboost/booster.rb', line 5

def initialize(params: nil, cache: nil, model_file: nil)
  cache ||= []
  cache.each do |d|
    if !d.is_a?(DMatrix)
      raise TypeError, "invalid cache item: #{d.class.name}"
    end
  end

  dmats = array_of_pointers(cache.map { |d| d.handle })
  out = ::FFI::MemoryPointer.new(:pointer)
  check_call FFI.XGBoosterCreate(dmats, cache.length, out)
  @handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGBoosterFree))

  cache.each do |d|
    assign_dmatrix_features(d)
  end

  if model_file
    check_call FFI.XGBoosterLoadModel(handle, model_file)
  end

  set_param(params)
end

Instance Method Details

#[](key_name) ⇒ Object

Raises:

  • (TypeError)


29
30
31
32
33
34
35
36
37
# File 'lib/xgboost/booster.rb', line 29

def [](key_name)
  if key_name.is_a?(String)
    return attr(key_name)
  end

  # TODO slice

  raise TypeError, "expected string"
end

#[]=(key_name, raw_value) ⇒ Object



39
40
41
# File 'lib/xgboost/booster.rb', line 39

def []=(key_name, raw_value)
  set_attr(**{key_name => raw_value})
end

#attr(key) ⇒ Object



50
51
52
53
54
55
# File 'lib/xgboost/booster.rb', line 50

def attr(key)
  ret = ::FFI::MemoryPointer.new(:pointer)
  success = ::FFI::MemoryPointer.new(:int)
  check_call FFI.XGBoosterGetAttr(handle, key.to_s, ret, success)
  success.read_int != 0 ? ret.read_pointer.read_string : nil
end

#attributesObject



57
58
59
60
61
62
63
# File 'lib/xgboost/booster.rb', line 57

def attributes
  length = ::FFI::MemoryPointer.new(:uint64)
  sarr = ::FFI::MemoryPointer.new(:pointer)
  check_call FFI.XGBoosterGetAttrNames(handle, length, sarr)
  attr_names = from_cstr_to_rbstr(sarr, length)
  attr_names.to_h { |n| [n, attr(n)] }
end

#best_iterationObject



127
128
129
# File 'lib/xgboost/booster.rb', line 127

def best_iteration
  attr(:best_iteration)&.to_i
end

#best_iteration=(iteration) ⇒ Object



131
132
133
# File 'lib/xgboost/booster.rb', line 131

def best_iteration=(iteration)
  set_attr(best_iteration: iteration)
end

#best_scoreObject



135
136
137
# File 'lib/xgboost/booster.rb', line 135

def best_score
  attr(:best_score)&.to_f
end

#best_score=(score) ⇒ Object



139
140
141
# File 'lib/xgboost/booster.rb', line 139

def best_score=(score)
  set_attr(best_score: score)
end

#dump(fmap: "", with_stats: false, dump_format: "text") ⇒ Object

returns an array of strings



175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/xgboost/booster.rb', line 175

def dump(fmap: "", with_stats: false, dump_format: "text")
  out_len = ::FFI::MemoryPointer.new(:uint64)
  out_result = ::FFI::MemoryPointer.new(:pointer)

  names = feature_names || []
  fnames = array_of_pointers(names.map { |fname| string_pointer(fname) })
  ftypes = array_of_pointers(feature_types || Array.new(names.size, string_pointer("float")))

  check_call FFI.XGBoosterDumpModelExWithFeatures(handle, names.size, fnames, ftypes, with_stats ? 1 : 0, dump_format, out_len, out_result)

  out_result.read_pointer.get_array_of_string(0, out_len.read_uint64)
end

#dump_model(fout, fmap: "", with_stats: false, dump_format: "text") ⇒ Object



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/xgboost/booster.rb', line 155

def dump_model(fout, fmap: "", with_stats: false, dump_format: "text")
  ret = dump(fmap: fmap, with_stats: with_stats, dump_format: dump_format)
  File.open(fout, "wb") do |f|
    if dump_format == "json"
      f.print("[\n")
      ret.each_with_index do |r, i|
        f.print(r)
        f.print(",\n") if i < ret.size - 1
      end
      f.print("\n]")
    else
      ret.each_with_index do |r, i|
        f.print("booster[#{i}]:\n")
        f.print(r)
      end
    end
  end
end

#eval_set(evals, iteration) ⇒ Object



101
102
103
104
105
106
107
108
109
110
# File 'lib/xgboost/booster.rb', line 101

def eval_set(evals, iteration)
  dmats = array_of_pointers(evals.map { |v| v[0].handle })
  evnames = array_of_pointers(evals.map { |v| string_pointer(v[1]) })

  out_result = ::FFI::MemoryPointer.new(:pointer)

  check_call FFI.XGBoosterEvalOneIter(handle, iteration, dmats, evnames, evals.size, out_result)

  out_result.read_pointer.read_string
end

#feature_namesObject



79
80
81
# File 'lib/xgboost/booster.rb', line 79

def feature_names
  get_feature_info("feature_name")
end

#feature_names=(features) ⇒ Object



83
84
85
# File 'lib/xgboost/booster.rb', line 83

def feature_names=(features)
  set_feature_info(features, "feature_name")
end

#feature_typesObject



71
72
73
# File 'lib/xgboost/booster.rb', line 71

def feature_types
  get_feature_info("feature_type")
end

#feature_types=(features) ⇒ Object



75
76
77
# File 'lib/xgboost/booster.rb', line 75

def feature_types=(features)
  set_feature_info(features, "feature_type")
end

#fscore(fmap: "") ⇒ Object



188
189
190
191
# File 'lib/xgboost/booster.rb', line 188

def fscore(fmap: "")
  # always weight
  score(fmap: fmap, importance_type: "weight")
end

#num_boosted_roundsObject



143
144
145
146
147
# File 'lib/xgboost/booster.rb', line 143

def num_boosted_rounds
  rounds = ::FFI::MemoryPointer.new(:int)
  check_call FFI.XGBoosterBoostedRounds(handle, rounds)
  rounds.read_int
end

#num_featuresObject



149
150
151
152
153
# File 'lib/xgboost/booster.rb', line 149

def num_features
  features = ::FFI::MemoryPointer.new(:uint64)
  check_call FFI.XGBoosterGetNumFeature(handle, features)
  features.read_uint64
end

#predict(data, ntree_limit: nil) ⇒ Object



112
113
114
115
116
117
118
119
120
121
# File 'lib/xgboost/booster.rb', line 112

def predict(data, ntree_limit: nil)
  ntree_limit ||= 0
  out_len = ::FFI::MemoryPointer.new(:uint64)
  out_result = ::FFI::MemoryPointer.new(:pointer)
  check_call FFI.XGBoosterPredict(handle, data.handle, 0, ntree_limit, 0, out_len, out_result)
  out = out_result.read_pointer.read_array_of_float(out_len.read_uint64)
  num_class = out.size / data.num_row
  out = out.each_slice(num_class).to_a if num_class > 1
  out
end

#save_configObject



43
44
45
46
47
48
# File 'lib/xgboost/booster.rb', line 43

def save_config
  length = ::FFI::MemoryPointer.new(:uint64)
  json_string = ::FFI::MemoryPointer.new(:pointer)
  check_call FFI.XGBoosterSaveJsonConfig(handle, length, json_string)
  json_string.read_pointer.read_string(length.read_uint64).force_encoding(Encoding::UTF_8)
end

#save_model(fname) ⇒ Object



123
124
125
# File 'lib/xgboost/booster.rb', line 123

def save_model(fname)
  check_call FFI.XGBoosterSaveModel(handle, fname)
end

#score(fmap: "", importance_type: "weight") ⇒ Object



193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/xgboost/booster.rb', line 193

def score(fmap: "", importance_type: "weight")
  if importance_type == "weight"
    trees = dump(fmap: fmap, with_stats: false)
    fmap = {}
    trees.each do |tree|
      tree.split("\n").each do |line|
        arr = line.split("[")
        next if arr.size == 1

        fid = arr[1].split("]")[0].split("<")[0]
        fmap[fid] ||= 0
        fmap[fid] += 1
      end
    end
    fmap
  else
    average_over_splits = true
    if importance_type == "total_gain"
      importance_type = "gain"
      average_over_splits = false
    elsif importance_type == "total_cover"
      importance_type = "cover"
      average_over_splits = false
    end

    trees = dump(fmap: fmap, with_stats: true)

    importance_type += "="
    fmap = {}
    gmap = {}
    trees.each do |tree|
      tree.split("\n").each do |line|
        arr = line.split("[")
        next if arr.size == 1

        fid = arr[1].split("]")

        g = fid[1].split(importance_type)[1].split(",")[0].to_f

        fid = fid[0].split("<")[0]

        fmap[fid] ||= 0
        gmap[fid] ||= 0

        fmap[fid] += 1
        gmap[fid] += g
      end
    end

    if average_over_splits
      gmap.each_key do |fid|
        gmap[fid] = gmap[fid] / fmap[fid]
      end
    end

    gmap
  end
end

#set_attr(**kwargs) ⇒ Object



65
66
67
68
69
# File 'lib/xgboost/booster.rb', line 65

def set_attr(**kwargs)
  kwargs.each do |key, value|
    check_call FFI.XGBoosterSetAttr(handle, key.to_s, value&.to_s)
  end
end

#set_param(params, value = nil) ⇒ Object



87
88
89
90
91
92
93
94
95
# File 'lib/xgboost/booster.rb', line 87

def set_param(params, value = nil)
  if params.is_a?(Enumerable)
    params.each do |k, v|
      check_call FFI.XGBoosterSetParam(handle, k.to_s, v.to_s)
    end
  else
    check_call FFI.XGBoosterSetParam(handle, params.to_s, value.to_s)
  end
end

#update(dtrain, iteration) ⇒ Object



97
98
99
# File 'lib/xgboost/booster.rb', line 97

def update(dtrain, iteration)
  check_call FFI.XGBoosterUpdateOneIter(handle, iteration, dtrain.handle)
end