Module: Torchrb::NN::ImageDefault

Defined in:
lib/torchrb/nn/image_default.rb

Instance Method Summary collapse

Instance Method Details

#define_nn(options) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/torchrb/nn/image_default.rb', line 3

def define_nn options
  # Dimensions:
  #    [4,256,256] INPUT
  # -> SpatialConvolution(nInputPlane=4, nOutputPlane=6, kernelW=5, kH=5, dimensionW=1, dH=1) -- dimension(Width|Height) defaults to 1
  #    -> outWidth = (width - kernelWidth) * dimensionWidth  + 1 = (256 - 5) * 1 + 1 = 252
  #    -> outHeight= (height- kernelHeight) *dimensionHeight + 1 = (256 - 5) * 1 + 1 = 252
  # -> SpatialMaxPooling(2,2,2,2) -- pad(Width|Height) defaults to 0
  #    -> outWidth = (width + 2*padWidth - kernelWidth) / dimensionWidth + 1

  image_width_height = options[:image_size].max
  kernel_width = 5
  input_layer = 120*2
  interm_layer = 84*2
  output_layer = 2

  view_size = ((image_width_height - kernel_width) * 1 + 1)
  view_size = view_size/2
  view_size = ((view_size - kernel_width) * 1 + 1)
  view_size = view_size/2

  torch.eval("      net = nn.Sequential()                                                   --                                                                         [ 4,256,256]   3,32,32\n      net:add(nn.SpatialConvolution(4, 6, \#{kernel_width}, \#{kernel_width}))  -- 4 input image channels, 6 output channels, 5x5 convolution kernel    -> [ 6,252,252]   6,28,28\n      net:add(nn.SpatialMaxPooling(2,2,2,2))                                  -- A max-pooling operation that looks at 2x2 windows and finds the max. -> [ 6,126,126]   6,14,14\n      net:add(nn.SpatialConvolution(6, 16, 5, 5))                             --                                                                      -> [16,122,122]  16,10,10\n      net:add(nn.SpatialMaxPooling(2,2,2,2))                                  --                                                                      -> [16, 61, 61]  16, 5, 5\n      net:add(nn.View(\#{16 * view_size * view_size}))                         -- reshapes from a 4D tensor of 16x5x5 into 1D tensor of 16*5*5         -> [59536]       400\n      net:add(nn.Linear(\#{16 * view_size * view_size}, \#{input_layer})  )     -- fully connected layer (matrix multiplication between input and weights)->   120 <-- randomly choosen\n      net:add(nn.Linear(\#{input_layer}, \#{interm_layer}))                     --                                                                      ->      84 <-- randomly choosen\n      net:add(nn.Linear(\#{interm_layer}, \#{output_layer}))                    -- 2 is the number of outputs of the network (in this case, 2 digits)   ->       2 <-- number of labels\n      net:add(nn.LogSoftMax())                                                --                                                                      ->       1 <-- which label?\n\n      local d = {}\n      for i,module in ipairs(net:listModules()) do\n        inSize = \"[]\"\n        outSize = \"[]\"\n        pcall(function () inSize = #module.input end)\n        pcall(function () outSize = #module.output end)\n        table.insert(d, {tostring(module), inSize, outSize} )\n      end\n      return d\n  EOF\nend\n", __FILE__, __LINE__).to_h