Class: Chainer::Functions::Loss::SoftmaxCrossEntropy

Inherits:
Chainer::Function show all
Defined in:
lib/chainer/functions/loss/softmax_cross_entropy.rb

Instance Attribute Summary

Attributes inherited from Chainer::Function

#inputs, #output_data, #outputs, #owned_node, #rank, #retain_after_backward

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Chainer::Function

#backward_cpu, #backward_gpu, #call, #forward_cpu, #forward_gpu, #label, #node, #retain_inputs, #retain_outputs

Constructor Details

#initialize(normalize: true, cache_score: true, class_weight: nil, ignore_label: -1,, reduce: 'mean') ⇒ SoftmaxCrossEntropy

Returns a new instance of SoftmaxCrossEntropy.



57
58
59
60
61
62
63
64
65
66
67
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 57

def initialize(normalize: true, cache_score: true, class_weight: nil, ignore_label: -1, reduce: 'mean')
  @normalize = normalize
  @cache_score = cache_score
  self.class.check_class_weight_option(class_weight)
  @class_weight = class_weight

  @ignore_label = ignore_label

  self.class.check_reduce_option(reduce)
  @reduce = reduce
end

Class Method Details

.check_class_weight_option(class_weight) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 167

def self.check_class_weight_option(class_weight)
  return if class_weight.nil?

  xm = Chainer.get_array_module(@class_weight)
  if class_weight.ndim != 1
    raise ArgumentError, 'class_weight.ndim should be 1'
  elsif (class_weight.class != xm::DFloat) and (class_weight.class != xm::SFloat)
    raise ArgumentError, "The dtype of class_weight should be 'DFloat' or 'SFloat'"
  elsif class_weight.kind_of?(Chainer::Variable)
    raise ArgumentError, 'class_weight should be a NArray, not a chainer.Variable'
  end
end

.check_reduce_option(reduce) ⇒ Object



180
181
182
183
184
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 180

def self.check_reduce_option(reduce)
  unless ['mean', 'no'].include?(reduce)
    raise ArgumentError, "only 'mean' and 'no' are valid for 'reduce', but #{reduce} is given"
  end
end

.double_backward_softmax_cross_entropy(x, t, normalize, class_weight, ignore_label, reduce) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 13

def self.double_backward_softmax_cross_entropy(x, t, normalize, class_weight, ignore_label, reduce)
  if t.is_a?(Chainer::Variable)
    t =  t.data
  end

  self.check_class_weight_option(class_weight)
  self.check_reduce_option(reduce)

  loss = -Activation::LogSoftmax.log_softmax(x)

  if class_weight
    shape = x.ndim.times.map { |d| d != 1 ? 1 : class_weight.shape[-1] }
    class_weight = Chainer::Functions::Array::BroadcastTo.broadcast_to(class_weight.reshape(*shape), x.shape)
    loss = loss * class_weight
  end

  dtype = x.is_a?(Chainer::Variable) ? x.dtype : x.class
  in_use = t.ne(ignore_label).cast_to(dtype)

  loss = Chainer::Functions::Array::Rollaxis.rollaxis(loss, 1, start:  loss.ndim)

  # TODO: loss = chainer.functions.reshape(loss, (-1, loss.shape[-1]))
  shape = loss.shape
  last_shape = shape.pop
  loss = Chainer::Functions::Array::Reshape.reshape(loss, [shape.inject(:*), last_shape])

  # Replace ignore_label value with one valid for F.select_item below.
  t = t.clip(0, loss.shape[1] - 1)

  loss = Chainer::Functions::Array::SelectItem.select_item(loss, t.flatten.dup)
  loss = Chainer::Functions::Array::Reshape.reshape(loss, t.shape)

  loss = loss * in_use

  if reduce == "mean"
    count = normalize ? in_use.sum : x.shape.first
    count = [count, 1.0].max
    loss = loss * (1.0 / count)
    return Chainer::Functions::Math::Sum.sum(loss)
  else
    return loss
  end
end

.softmax_cross_entropy(x, t, normalize: true, cache_score: true, class_weight: nil, ignore_label: -1,, reduce: 'mean', enable_double_backprop: false) ⇒ Object



5
6
7
8
9
10
11
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 5

def self.softmax_cross_entropy(x, t, normalize: true, cache_score: true, class_weight: nil, ignore_label: -1, reduce: 'mean', enable_double_backprop: false)
  if enable_double_backprop
    self.double_backward_softmax_cross_entropy(x, t, normalize, class_weight, ignore_label, reduce)
  else
    self.new(normalize: normalize, cache_score: cache_score, class_weight: class_weight, ignore_label: ignore_label, reduce: reduce).(x, t)
  end
end

Instance Method Details

#backward(inputs, grad_outputs) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 107

def backward(inputs, grad_outputs)
  xm = Chainer.get_array_module(*(inputs + grad_outputs))
  x, t = inputs
  gloss = grad_outputs[0]

  if self.instance_variable_defined?(:'@y')
    y = @y.dup
  else
    y = Activation._log_softmax(x)
    y = xm::NMath.exp(y)
  end

  if y.ndim == 2
    gx = y
    # TODO(sonots): Avoid to_a especially in Cumo to improve performance
    t.class.new(t.shape[0]).seq(0).to_a.zip(t.class.maximum(t, 0).to_a).each{|v| gx[*v] -= 1}

    if @class_weight
      shape = x.ndim.times.map { |d| d == 1 ? true : 1 }
      c = Chainer::Utils::Array.broadcast_to(@class_weight.reshape(*shape), x.shape)
      c = c[t.class.new(t.shape[0]).seq, t.class.maximum(t, 0)].diagonal.dup
      gx *= Chainer::Utils::Array.broadcast_to(c.expand_dims(1), gx.shape)
    end

    if @ignore_label
      gx *= (t.ne @ignore_label).reshape(t.shape[0], 1)
    end
  else
    # in the case where y.ndim is higher than 2,
    # we think that a current implementation is inefficient
    # because it yields two provisional arrays for indexing.

    n_unit = t.size / t.shape[0]
    gx = y.reshape(y.shape[0], y.shape[1], true)
    fst_index = xm::Int32.new(t.size).seq(0) / n_unit
    trd_index = xm::Int32.new(t.size).seq(0) % n_unit
    # TODO(sonots): Avoid to_a especially in Cumo to improve performance
    fst_index.to_a.zip(t.class.maximum(t.flatten.dup, 0).to_a, trd_index.to_a).each{|v| gx[*v] -= 1}
    if @class_weight
      shape = x.ndim.times.map{|d| d == 1 ? true : 1}
      c = Chainer::Utils::Array.broadcast_to(@class_weight.reshape(*shape), x.shape)
      c = c.reshape(*gx.shape)
      c = c[fst_index, t.class.maximum(t.flatten.dup, 0), trd_index].diagonal.diagonal.dup
      c = c.reshape(y.shape[0], 1, true)
      gx *= Chainer::Utils::Array.broadcast_to(c, gx.shape)
    end
    if @ignore_label
      gx *= (t.ne @ignore_label).reshape(t.shape[0], 1, true)
    end
    gx = gx.reshape(*y.shape)
  end

  if @reduce == 'mean'
    gx *= gloss * @coeff
  else
    gx *= gloss[true,:- , false]
  end
  return [gx, nil]
end

#forward(inputs) ⇒ Object



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/chainer/functions/loss/softmax_cross_entropy.rb', line 69

def forward(inputs)
  xm = Chainer.get_array_module(*inputs)
  x, t = inputs
  log_y = Activation._log_softmax(x)

  if @cache_score
    @y = xm::NMath.exp(log_y)
  end
  if @class_weight
    shape = x.ndim.times.map { |e| e == 1 ? true : 1 }
    log_y *= Chainer::Utils::Array.broadcast_to(@class_weight.reshape(*shape), x.shape)
  end
  log_yd = Chainer::Utils::Array.rollaxis(log_y, 1)
  begin
    log_yd = log_yd.reshape(log_yd.shape[0], true)
  rescue ArgumentError
  end

  log_p = log_yd[t.class.maximum(t.flatten, 0), t.class.new(t.size).seq].diagonal
  if @ignore_label
    t_valid= t.ne(@ignore_label)
    log_p *= t_valid.flatten
  end

  if @reduce == 'mean'
    if @normalize and t_valid
      @coeff = 1.0 / log_p.class.maximum(Chainer::Utils::Array.force_array(t_valid.count), 1)
    else
      count = x.shape[0]
      @coeff = 1.0 / [count, 1].max
    end
    y = log_p.sum(keepdims: true) * (-@coeff)
    [y.class.cast(y[0])]
  else
    [-log_p.reshape(*t.shape)]
  end
end