Class: XGBoost::DMatrix

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/xgboost/dmatrix.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data, label: nil, weight: nil, missing: Float::NAN) ⇒ DMatrix

Returns a new instance of DMatrix.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/xgboost/dmatrix.rb', line 5

def initialize(data, label: nil, weight: nil, missing: Float::NAN)
  @data = data

  @handle = ::FFI::MemoryPointer.new(:pointer)

  if data
    if matrix?(data)
      nrow = data.row_count
      ncol = data.column_count
      flat_data = data.to_a.flatten
    elsif daru?(data)
      nrow, ncol = data.shape
      flat_data = data.map_rows(&:to_a).flatten
      @feature_names = data.each_vector.map(&:name)
      @feature_types =
        data.each_vector.map(&:db_type).map do |v|
          case v
          when "INTEGER"
            "int"
          when "DOUBLE"
            "float"
          else
            raise Error, "Unknown feature type: #{v}"
          end
        end
    elsif numo?(data)
      nrow, ncol = data.shape
    elsif rover?(data)
      nrow, ncol = data.shape
      @feature_names = data.keys
      data = data.to_numo
    else
      nrow = data.count
      ncol = data.first.count
      if !data.all? { |r| r.size == ncol }
        # TODO raise ArgumentError in 0.8.0
        raise IndexError, "Rows have different sizes"
      end
      flat_data = data.flatten
    end

    c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
    if numo?(data)
      c_data.write_bytes(data.cast_to(Numo::SFloat).to_string)
    else
      handle_missing(flat_data, missing)
      c_data.write_array_of_float(flat_data)
    end
    check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)

    ObjectSpace.define_finalizer(@handle, self.class.finalize(handle_pointer.to_i))

    @feature_names ||= ncol.times.map { |i| "f#{i}" }
  end

  self.label = label if label
  self.weight = weight if weight
end

Instance Attribute Details

#dataObject (readonly)

Returns the value of attribute data.



3
4
5
# File 'lib/xgboost/dmatrix.rb', line 3

def data
  @data
end

#feature_namesObject (readonly)

Returns the value of attribute feature_names.



3
4
5
# File 'lib/xgboost/dmatrix.rb', line 3

def feature_names
  @feature_names
end

#feature_typesObject (readonly)

Returns the value of attribute feature_types.



3
4
5
# File 'lib/xgboost/dmatrix.rb', line 3

def feature_types
  @feature_types
end

Class Method Details

.finalize(addr) ⇒ Object



64
65
66
67
# File 'lib/xgboost/dmatrix.rb', line 64

def self.finalize(addr)
  # must use proc instead of stabby lambda
  proc { FFI.XGDMatrixFree(::FFI::Pointer.new(:pointer, addr)) }
end

Instance Method Details

#group=(group) ⇒ Object



85
86
87
88
89
# File 'lib/xgboost/dmatrix.rb', line 85

def group=(group)
  c_data = ::FFI::MemoryPointer.new(:int, group.size)
  c_data.write_array_of_int(group)
  check_result FFI.XGDMatrixSetUIntInfo(handle_pointer, "group", c_data, group.size)
end

#handleObject



115
116
117
# File 'lib/xgboost/dmatrix.rb', line 115

def handle
  @handle
end

#handle_pointerObject



119
120
121
# File 'lib/xgboost/dmatrix.rb', line 119

def handle_pointer
  @handle.read_pointer
end

#labelObject



69
70
71
# File 'lib/xgboost/dmatrix.rb', line 69

def label
  float_info("label")
end

#label=(label) ⇒ Object



77
78
79
# File 'lib/xgboost/dmatrix.rb', line 77

def label=(label)
  set_float_info("label", label)
end

#num_colObject



97
98
99
100
101
# File 'lib/xgboost/dmatrix.rb', line 97

def num_col
  out = ::FFI::MemoryPointer.new(:uint64)
  check_result FFI.XGDMatrixNumCol(handle_pointer, out)
  read_uint64(out)
end

#num_rowObject



91
92
93
94
95
# File 'lib/xgboost/dmatrix.rb', line 91

def num_row
  out = ::FFI::MemoryPointer.new(:uint64)
  check_result FFI.XGDMatrixNumRow(handle_pointer, out)
  read_uint64(out)
end

#save_binary(fname, silent: true) ⇒ Object



111
112
113
# File 'lib/xgboost/dmatrix.rb', line 111

def save_binary(fname, silent: true)
  check_result FFI.XGDMatrixSaveBinary(handle_pointer, fname, silent ? 1 : 0)
end

#slice(rindex) ⇒ Object



103
104
105
106
107
108
109
# File 'lib/xgboost/dmatrix.rb', line 103

def slice(rindex)
  res = DMatrix.new(nil)
  idxset = ::FFI::MemoryPointer.new(:int, rindex.count)
  idxset.write_array_of_int(rindex)
  check_result FFI.XGDMatrixSliceDMatrix(handle_pointer, idxset, rindex.size, res.handle)
  res
end

#weightObject



73
74
75
# File 'lib/xgboost/dmatrix.rb', line 73

def weight
  float_info("weight")
end

#weight=(weight) ⇒ Object



81
82
83
# File 'lib/xgboost/dmatrix.rb', line 81

def weight=(weight)
  set_float_info("weight", weight)
end