Module: Torchrb::NN::ImageDefault

Defined in:
lib/torchrb/nn/image_default.rb

Instance Method Summary collapse

Instance Method Details

#define_nn(options) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/torchrb/nn/image_default.rb', line 3

def define_nn options
  # Dimensions:
  #    [4,256,256] INPUT
  # -> SpatialConvolution(nInputPlane=4, nOutputPlane=6, kernelW=5, kH=5, dimensionW=1, dH=1) -- dimension(Width|Height) defaults to 1
  #    -> outWidth = (width - kernelWidth) * dimensionWidth  + 1 = (256 - 5) * 1 + 1 = 252
  #    -> outHeight= (height- kernelHeight) *dimensionHeight + 1 = (256 - 5) * 1 + 1 = 252
  # -> SpatialMaxPooling(2,2,2,2) -- pad(Width|Height) defaults to 0
  #    -> outWidth = (width + 2*padWidth - kernelWidth) / dimensionWidth + 1

  image_width_height = options[:image_size].max
  kernel_width = 5
  input_layer = 120*2
  interm_layer = 84*2
  output_layer = 2

  view_size = ((image_width_height - kernel_width) * 1 + 1)
  view_size = view_size/2
  view_size = ((view_size - kernel_width) * 1 + 1)
  view_size = view_size/2

  torch.eval(<<-EOF, __FILE__, __LINE__).to_h
      net = nn.Sequential()                                                   --                                                                         [ 4,256,256]   3,32,32
      net:add(nn.SpatialConvolution(4, 6, #{kernel_width}, #{kernel_width}))  -- 4 input image channels, 6 output channels, 5x5 convolution kernel    -> [ 6,252,252]   6,28,28
      net:add(nn.SpatialMaxPooling(2,2,2,2))                                  -- A max-pooling operation that looks at 2x2 windows and finds the max. -> [ 6,126,126]   6,14,14
      net:add(nn.SpatialConvolution(6, 16, 5, 5))                             --                                                                      -> [16,122,122]  16,10,10
      net:add(nn.SpatialMaxPooling(2,2,2,2))                                  --                                                                      -> [16, 61, 61]  16, 5, 5
      net:add(nn.View(#{16 * view_size * view_size}))                         -- reshapes from a 4D tensor of 16x5x5 into 1D tensor of 16*5*5         -> [59536]       400
      net:add(nn.Linear(#{16 * view_size * view_size}, #{input_layer})  )     -- fully connected layer (matrix multiplication between input and weights)->   120 <-- randomly choosen
      net:add(nn.Linear(#{input_layer}, #{interm_layer}))                     --                                                                      ->      84 <-- randomly choosen
      net:add(nn.Linear(#{interm_layer}, #{output_layer}))                    -- 2 is the number of outputs of the network (in this case, 2 digits)   ->       2 <-- number of labels
      net:add(nn.LogSoftMax())                                                --                                                                      ->       1 <-- which label?

      local d = {}
      for i,module in ipairs(net:listModules()) do
        inSize = "[]"
        outSize = "[]"
        pcall(function () inSize = #module.input end)
        pcall(function () outSize = #module.output end)
        table.insert(d, {tostring(module), inSize, outSize} )
      end
      return d
  EOF
end