Class: Chainer::Functions::Loss::SoftmaxCrossEntropy
- Inherits:
-
Chainer::Function
- Object
- Chainer::Function
- Chainer::Functions::Loss::SoftmaxCrossEntropy
- Defined in:
- lib/chainer/functions/loss/softmax_cross_entropy.rb
Instance Attribute Summary
Attributes inherited from Chainer::Function
#inputs, #output_data, #outputs, #owned_node, #rank, #retain_after_backward
Class Method Summary collapse
- .check_class_weight_option(class_weight) ⇒ Object
- .check_reduce_option(reduce) ⇒ Object
- .double_backward_softmax_cross_entropy(x, t, normalize, class_weight, ignore_label, reduce) ⇒ Object
- .softmax_cross_entropy(x, t, normalize: true, cache_score: true, class_weight: nil, ignore_label: -1,, reduce: 'mean', enable_double_backprop: false) ⇒ Object
Instance Method Summary collapse
- #backward(inputs, grad_outputs) ⇒ Object
- #forward(inputs) ⇒ Object
-
#initialize(normalize: true, cache_score: true, class_weight: nil, ignore_label: -1,, reduce: 'mean') ⇒ SoftmaxCrossEntropy
constructor
A new instance of SoftmaxCrossEntropy.
Methods inherited from Chainer::Function
#backward_cpu, #backward_gpu, #call, #forward_cpu, #forward_gpu, #label, #node, #retain_inputs, #retain_outputs
Constructor Details
#initialize(normalize: true, cache_score: true, class_weight: nil, ignore_label: -1,, reduce: 'mean') ⇒ SoftmaxCrossEntropy
Returns a new instance of SoftmaxCrossEntropy.
57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 57 def initialize(normalize: true, cache_score: true, class_weight: nil, ignore_label: -1, reduce: 'mean') @normalize = normalize @cache_score = cache_score self.class.check_class_weight_option(class_weight) @class_weight = class_weight @ignore_label = ignore_label self.class.check_reduce_option(reduce) @reduce = reduce end |
Class Method Details
.check_class_weight_option(class_weight) ⇒ Object
167 168 169 170 171 172 173 174 175 176 177 178 |
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 167 def self.check_class_weight_option(class_weight) return if class_weight.nil? xm = Chainer.get_array_module(@class_weight) if class_weight.ndim != 1 raise ArgumentError, 'class_weight.ndim should be 1' elsif (class_weight.class != xm::DFloat) and (class_weight.class != xm::SFloat) raise ArgumentError, "The dtype of class_weight should be 'DFloat' or 'SFloat'" elsif class_weight.kind_of?(Chainer::Variable) raise ArgumentError, 'class_weight should be a NArray, not a chainer.Variable' end end |
.check_reduce_option(reduce) ⇒ Object
180 181 182 183 184 |
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 180 def self.check_reduce_option(reduce) unless ['mean', 'no'].include?(reduce) raise ArgumentError, "only 'mean' and 'no' are valid for 'reduce', but #{reduce} is given" end end |
.double_backward_softmax_cross_entropy(x, t, normalize, class_weight, ignore_label, reduce) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 13 def self.double_backward_softmax_cross_entropy(x, t, normalize, class_weight, ignore_label, reduce) if t.is_a?(Chainer::Variable) t = t.data end self.check_class_weight_option(class_weight) self.check_reduce_option(reduce) loss = -Activation::LogSoftmax.log_softmax(x) if class_weight shape = x.ndim.times.map { |d| d != 1 ? 1 : class_weight.shape[-1] } class_weight = Chainer::Functions::Array::BroadcastTo.broadcast_to(class_weight.reshape(*shape), x.shape) loss = loss * class_weight end dtype = x.is_a?(Chainer::Variable) ? x.dtype : x.class in_use = t.ne(ignore_label).cast_to(dtype) loss = Chainer::Functions::Array::Rollaxis.rollaxis(loss, 1, start: loss.ndim) # TODO: loss = chainer.functions.reshape(loss, (-1, loss.shape[-1])) shape = loss.shape last_shape = shape.pop loss = Chainer::Functions::Array::Reshape.reshape(loss, [shape.inject(:*), last_shape]) # Replace ignore_label value with one valid for F.select_item below. t = t.clip(0, loss.shape[1] - 1) loss = Chainer::Functions::Array::SelectItem.select_item(loss, t.flatten.dup) loss = Chainer::Functions::Array::Reshape.reshape(loss, t.shape) loss = loss * in_use if reduce == "mean" count = normalize ? in_use.sum : x.shape.first count = [count, 1.0].max loss = loss * (1.0 / count) return Chainer::Functions::Math::Sum.sum(loss) else return loss end end |
.softmax_cross_entropy(x, t, normalize: true, cache_score: true, class_weight: nil, ignore_label: -1,, reduce: 'mean', enable_double_backprop: false) ⇒ Object
5 6 7 8 9 10 11 |
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 5 def self.softmax_cross_entropy(x, t, normalize: true, cache_score: true, class_weight: nil, ignore_label: -1, reduce: 'mean', enable_double_backprop: false) if enable_double_backprop self.double_backward_softmax_cross_entropy(x, t, normalize, class_weight, ignore_label, reduce) else self.new(normalize: normalize, cache_score: cache_score, class_weight: class_weight, ignore_label: ignore_label, reduce: reduce).(x, t) end end |
Instance Method Details
#backward(inputs, grad_outputs) ⇒ Object
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 107 def backward(inputs, grad_outputs) xm = Chainer.get_array_module(*(inputs + grad_outputs)) x, t = inputs gloss = grad_outputs[0] if self.instance_variable_defined?(:'@y') y = @y.dup else y = Activation._log_softmax(x) y = xm::NMath.exp(y) end if y.ndim == 2 gx = y # TODO(sonots): Avoid to_a especially in Cumo to improve performance t.class.new(t.shape[0]).seq(0).to_a.zip(t.class.maximum(t, 0).to_a).each{|v| gx[*v] -= 1} if @class_weight shape = x.ndim.times.map { |d| d == 1 ? true : 1 } c = Chainer::Utils::Array.broadcast_to(@class_weight.reshape(*shape), x.shape) c = c[t.class.new(t.shape[0]).seq, t.class.maximum(t, 0)].diagonal.dup gx *= Chainer::Utils::Array.broadcast_to(c.(1), gx.shape) end if @ignore_label gx *= (t.ne @ignore_label).reshape(t.shape[0], 1) end else # in the case where y.ndim is higher than 2, # we think that a current implementation is inefficient # because it yields two provisional arrays for indexing. n_unit = t.size / t.shape[0] gx = y.reshape(y.shape[0], y.shape[1], true) fst_index = xm::Int32.new(t.size).seq(0) / n_unit trd_index = xm::Int32.new(t.size).seq(0) % n_unit # TODO(sonots): Avoid to_a especially in Cumo to improve performance fst_index.to_a.zip(t.class.maximum(t.flatten.dup, 0).to_a, trd_index.to_a).each{|v| gx[*v] -= 1} if @class_weight shape = x.ndim.times.map{|d| d == 1 ? true : 1} c = Chainer::Utils::Array.broadcast_to(@class_weight.reshape(*shape), x.shape) c = c.reshape(*gx.shape) c = c[fst_index, t.class.maximum(t.flatten.dup, 0), trd_index].diagonal.diagonal.dup c = c.reshape(y.shape[0], 1, true) gx *= Chainer::Utils::Array.broadcast_to(c, gx.shape) end if @ignore_label gx *= (t.ne @ignore_label).reshape(t.shape[0], 1, true) end gx = gx.reshape(*y.shape) end if @reduce == 'mean' gx *= gloss * @coeff else gx *= gloss[true,:- , false] end return [gx, nil] end |
#forward(inputs) ⇒ Object
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 69 def forward(inputs) xm = Chainer.get_array_module(*inputs) x, t = inputs log_y = Activation._log_softmax(x) if @cache_score @y = xm::NMath.exp(log_y) end if @class_weight shape = x.ndim.times.map { |e| e == 1 ? true : 1 } log_y *= Chainer::Utils::Array.broadcast_to(@class_weight.reshape(*shape), x.shape) end log_yd = Chainer::Utils::Array.rollaxis(log_y, 1) begin log_yd = log_yd.reshape(log_yd.shape[0], true) rescue ArgumentError end log_p = log_yd[t.class.maximum(t.flatten, 0), t.class.new(t.size).seq].diagonal if @ignore_label t_valid= t.ne(@ignore_label) log_p *= t_valid.flatten end if @reduce == 'mean' if @normalize and t_valid @coeff = 1.0 / log_p.class.maximum(Chainer::Utils::Array.force_array(t_valid.count), 1) else count = x.shape[0] @coeff = 1.0 / [count, 1].max end y = log_p.sum(keepdims: true) * (-@coeff) [y.class.cast(y[0])] else [-log_p.reshape(*t.shape)] end end |