63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
# File 'lib/xgb.rb', line 63
def cv(params, dtrain, num_boost_round: 10, nfold: 3, seed: 0, shuffle: true, verbose_eval: nil, show_stdv: true, early_stopping_rounds: nil)
rand_idx = (0...dtrain.num_row).to_a
rand_idx.shuffle!(random: Random.new(seed)) if shuffle
kstep = (rand_idx.size / nfold.to_f).ceil
test_id = rand_idx.each_slice(kstep).to_a[0...nfold]
train_id = []
nfold.times do |i|
idx = test_id.dup
idx.delete_at(i)
train_id << idx.flatten
end
folds = train_id.zip(test_id)
cvfolds = []
folds.each do |(train_idx, test_idx)|
fold_dtrain = dtrain.slice(train_idx)
fold_dvalid = dtrain.slice(test_idx)
booster = Booster.new(params: params)
booster.set_param("num_feature", dtrain.num_col)
cvfolds << [booster, fold_dtrain, fold_dvalid]
end
eval_hist = {}
if early_stopping_rounds
best_score = nil
best_iter = nil
end
num_boost_round.times do |iteration|
scores = {}
cvfolds.each do |(booster, fold_dtrain, fold_dvalid)|
booster.update(fold_dtrain, iteration)
message = booster.eval_set([[fold_dtrain, "train"], [fold_dvalid, "test"]], iteration)
res = message.split.map { |x| x.split(":") }[1..-1].map { |k, v| [k, v.to_f] }
res.each do |k, v|
(scores[k] ||= []) << v
end
end
message_parts = ["[#{iteration}]"]
last_mean = nil
means = {}
scores.each do |eval_name, vals|
mean = mean(vals)
stdev = stdev(vals)
(eval_hist["#{eval_name}-mean"] ||= []) << mean
(eval_hist["#{eval_name}-std"] ||= []) << stdev
means[eval_name] = mean
last_mean = mean
if show_stdv
message_parts << "%s:%g+%g" % [eval_name, mean, stdev]
else
message_parts << "%s:%g" % [eval_name, mean]
end
end
if early_stopping_rounds
score = last_mean
if best_score.nil? || score < best_score
best_score = score
best_iter = iteration
elsif iteration - best_iter >= early_stopping_rounds
eval_hist.each_key do |k|
eval_hist[k] = eval_hist[k][0..best_iter]
end
break
end
end
puts message_parts.join("\t") if verbose_eval
end
eval_hist
end
|