Class: NanoGPT::GPTConfig

Inherits:
Object
  • Object
show all
Defined in:
lib/nano_gpt/config.rb

Overview

Configuration for GPT model architecture

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(block_size: 1024, vocab_size: 50304, n_layer: 12, n_head: 12, n_embd: 768, dropout: 0.0, bias: true) ⇒ GPTConfig

Returns a new instance of GPTConfig.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/nano_gpt/config.rb', line 8

def initialize(
  block_size: 1024,
  vocab_size: 50304,
  n_layer: 12,
  n_head: 12,
  n_embd: 768,
  dropout: 0.0,
  bias: true
)
  @block_size = block_size
  @vocab_size = vocab_size
  @n_layer = n_layer
  @n_head = n_head
  @n_embd = n_embd
  @dropout = dropout
  @bias = bias
end

Instance Attribute Details

#biasObject

Returns the value of attribute bias.



6
7
8
# File 'lib/nano_gpt/config.rb', line 6

def bias
  @bias
end

#block_sizeObject

Returns the value of attribute block_size.



6
7
8
# File 'lib/nano_gpt/config.rb', line 6

def block_size
  @block_size
end

#dropoutObject

Returns the value of attribute dropout.



6
7
8
# File 'lib/nano_gpt/config.rb', line 6

def dropout
  @dropout
end

#n_embdObject

Returns the value of attribute n_embd.



6
7
8
# File 'lib/nano_gpt/config.rb', line 6

def n_embd
  @n_embd
end

#n_headObject

Returns the value of attribute n_head.



6
7
8
# File 'lib/nano_gpt/config.rb', line 6

def n_head
  @n_head
end

#n_layerObject

Returns the value of attribute n_layer.



6
7
8
# File 'lib/nano_gpt/config.rb', line 6

def n_layer
  @n_layer
end

#vocab_sizeObject

Returns the value of attribute vocab_size.



6
7
8
# File 'lib/nano_gpt/config.rb', line 6

def vocab_size
  @vocab_size
end

Instance Method Details

#head_sizeObject



38
39
40
# File 'lib/nano_gpt/config.rb', line 38

def head_size
  @n_embd / @n_head
end

#to_hObject



26
27
28
29
30
31
32
33
34
35
36
# File 'lib/nano_gpt/config.rb', line 26

def to_h
  {
    block_size: @block_size,
    vocab_size: @vocab_size,
    n_layer: @n_layer,
    n_head: @n_head,
    n_embd: @n_embd,
    dropout: @dropout,
    bias: @bias
  }
end