Class: XGBoost::Booster

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/xgboost/booster.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(params: nil, model_file: nil) ⇒ Booster

Returns a new instance of Booster.



5
6
7
8
9
10
11
12
13
14
15
16
# File 'lib/xgboost/booster.rb', line 5

def initialize(params: nil, model_file: nil)
  @handle = ::FFI::MemoryPointer.new(:pointer)
  check_result FFI.XGBoosterCreate(nil, 0, @handle)
  ObjectSpace.define_finalizer(@handle, self.class.finalize(handle_pointer.to_i))

  if model_file
    check_result FFI.XGBoosterLoadModel(handle_pointer, model_file)
  end

  self.best_iteration = 0
  set_param(params)
end

Instance Attribute Details

#best_iterationObject

Returns the value of attribute best_iteration.



3
4
5
# File 'lib/xgboost/booster.rb', line 3

def best_iteration
  @best_iteration
end

#feature_namesObject

Returns the value of attribute feature_names.



3
4
5
# File 'lib/xgboost/booster.rb', line 3

def feature_names
  @feature_names
end

#feature_typesObject

Returns the value of attribute feature_types.



3
4
5
# File 'lib/xgboost/booster.rb', line 3

def feature_types
  @feature_types
end

Class Method Details

.finalize(addr) ⇒ Object



18
19
20
21
# File 'lib/xgboost/booster.rb', line 18

def self.finalize(addr)
  # must use proc instead of stabby lambda
  proc { FFI.XGBoosterFree(::FFI::Pointer.new(:pointer, addr)) }
end

Instance Method Details

#[](key_name) ⇒ Object



160
161
162
163
164
165
166
167
168
# File 'lib/xgboost/booster.rb', line 160

def [](key_name)
  key = string_pointer(key_name)
  success = ::FFI::MemoryPointer.new(:int)
  out_result = ::FFI::MemoryPointer.new(:pointer)

  check_result FFI.XGBoosterGetAttr(handle_pointer, key, out_result, success)

  success.read_int == 1 ? out_result.read_pointer.read_string : nil
end

#[]=(key_name, raw_value) ⇒ Object



170
171
172
173
174
175
# File 'lib/xgboost/booster.rb', line 170

def []=(key_name, raw_value)
  key = string_pointer(key_name)
  value = raw_value.nil? ? nil : string_pointer(raw_value)

  check_result FFI.XGBoosterSetAttr(handle_pointer, key, value)
end

#attributesObject



177
178
179
180
181
182
183
184
185
186
# File 'lib/xgboost/booster.rb', line 177

def attributes
  out_len = ::FFI::MemoryPointer.new(:uint64)
  out_result = ::FFI::MemoryPointer.new(:pointer)
  check_result FFI.XGBoosterGetAttrNames(handle_pointer, out_len, out_result)

  len = read_uint64(out_len)
  key_names = len.zero? ? [] : out_result.read_pointer.get_array_of_string(0, len)

  key_names.map { |key_name| [key_name, self[key_name]] }.to_h
end

#dump(fmap: "", with_stats: false, dump_format: "text") ⇒ Object

returns an array of strings



64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/xgboost/booster.rb', line 64

def dump(fmap: "", with_stats: false, dump_format: "text")
  out_len = ::FFI::MemoryPointer.new(:uint64)
  out_result = ::FFI::MemoryPointer.new(:pointer)

  names = feature_names || []
  fnames = array_of_pointers(names.map { |fname| string_pointer(fname) })
  ftypes = array_of_pointers(feature_types || Array.new(names.size, string_pointer("float")))

  check_result FFI.XGBoosterDumpModelExWithFeatures(handle_pointer, names.size, fnames, ftypes, with_stats ? 1 : 0, dump_format, out_len, out_result)

  out_result.read_pointer.get_array_of_string(0, read_uint64(out_len))
end

#dump_model(fout, fmap: "", with_stats: false, dump_format: "text") ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/xgboost/booster.rb', line 77

def dump_model(fout, fmap: "", with_stats: false, dump_format: "text")
  ret = dump(fmap: fmap, with_stats: with_stats, dump_format: dump_format)
  File.open(fout, "wb") do |f|
    if dump_format == "json"
      f.print("[\n")
      ret.each_with_index do |r, i|
        f.print(r)
        f.print(",\n") if i < ret.size - 1
      end
      f.print("\n]")
    else
      ret.each_with_index do |r, i|
        f.print("booster[#{i}]:\n")
        f.print(r)
      end
    end
  end
end

#eval_set(evals, iteration) ⇒ Object



27
28
29
30
31
32
33
34
35
36
# File 'lib/xgboost/booster.rb', line 27

def eval_set(evals, iteration)
  dmats = array_of_pointers(evals.map { |v| v[0].handle_pointer })
  evnames = array_of_pointers(evals.map { |v| string_pointer(v[1]) })

  out_result = ::FFI::MemoryPointer.new(:pointer)

  check_result FFI.XGBoosterEvalOneIter(handle_pointer, iteration, dmats, evnames, evals.size, out_result)

  out_result.read_pointer.read_string
end

#fscore(fmap: "") ⇒ Object



96
97
98
99
# File 'lib/xgboost/booster.rb', line 96

def fscore(fmap: "")
  # always weight
  score(fmap: fmap, importance_type: "weight")
end

#predict(data, ntree_limit: nil) ⇒ Object



48
49
50
51
52
53
54
55
56
57
# File 'lib/xgboost/booster.rb', line 48

def predict(data, ntree_limit: nil)
  ntree_limit ||= 0
  out_len = ::FFI::MemoryPointer.new(:uint64)
  out_result = ::FFI::MemoryPointer.new(:pointer)
  check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, 0, out_len, out_result)
  out = out_result.read_pointer.read_array_of_float(read_uint64(out_len))
  num_class = out.size / data.num_row
  out = out.each_slice(num_class).to_a if num_class > 1
  out
end

#save_model(fname) ⇒ Object



59
60
61
# File 'lib/xgboost/booster.rb', line 59

def save_model(fname)
  check_result FFI.XGBoosterSaveModel(handle_pointer, fname)
end

#score(fmap: "", importance_type: "weight") ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/xgboost/booster.rb', line 101

def score(fmap: "", importance_type: "weight")
  if importance_type == "weight"
    trees = dump(fmap: fmap, with_stats: false)
    fmap = {}
    trees.each do |tree|
      tree.split("\n").each do |line|
        arr = line.split("[")
        next if arr.size == 1

        fid = arr[1].split("]")[0].split("<")[0]
        fmap[fid] ||= 0
        fmap[fid] += 1
      end
    end
    fmap
  else
    average_over_splits = true
    if importance_type == "total_gain"
      importance_type = "gain"
      average_over_splits = false
    elsif importance_type == "total_cover"
      importance_type = "cover"
      average_over_splits = false
    end

    trees = dump(fmap: fmap, with_stats: true)

    importance_type += "="
    fmap = {}
    gmap = {}
    trees.each do |tree|
      tree.split("\n").each do |line|
        arr = line.split("[")
        next if arr.size == 1

        fid = arr[1].split("]")

        g = fid[1].split(importance_type)[1].split(",")[0].to_f

        fid = fid[0].split("<")[0]

        fmap[fid] ||= 0
        gmap[fid] ||= 0

        fmap[fid] += 1
        gmap[fid] += g
      end
    end

    if average_over_splits
      gmap.each_key do |fid|
        gmap[fid] = gmap[fid] / fmap[fid]
      end
    end

    gmap
  end
end

#set_param(params, value = nil) ⇒ Object



38
39
40
41
42
43
44
45
46
# File 'lib/xgboost/booster.rb', line 38

def set_param(params, value = nil)
  if params.is_a?(Enumerable)
    params.each do |k, v|
      check_result FFI.XGBoosterSetParam(handle_pointer, k.to_s, v.to_s)
    end
  else
    check_result FFI.XGBoosterSetParam(handle_pointer, params.to_s, value.to_s)
  end
end

#update(dtrain, iteration) ⇒ Object



23
24
25
# File 'lib/xgboost/booster.rb', line 23

def update(dtrain, iteration)
  check_result FFI.XGBoosterUpdateOneIter(handle_pointer, iteration, dtrain.handle_pointer)
end