Class: NanoGPT::LRScheduler

Inherits:
Object
  • Object
show all
Defined in:
lib/nano_gpt/lr_scheduler.rb

Overview

Cosine learning rate scheduler with linear warmup

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(learning_rate:, min_lr:, warmup_iters:, lr_decay_iters:) ⇒ LRScheduler

Returns a new instance of LRScheduler.



8
9
10
11
12
13
# File 'lib/nano_gpt/lr_scheduler.rb', line 8

def initialize(learning_rate:, min_lr:, warmup_iters:, lr_decay_iters:)
  @learning_rate = learning_rate
  @min_lr = min_lr
  @warmup_iters = warmup_iters
  @lr_decay_iters = lr_decay_iters
end

Instance Attribute Details

#learning_rateObject (readonly)

Returns the value of attribute learning_rate.



6
7
8
# File 'lib/nano_gpt/lr_scheduler.rb', line 6

def learning_rate
  @learning_rate
end

#lr_decay_itersObject (readonly)

Returns the value of attribute lr_decay_iters.



6
7
8
# File 'lib/nano_gpt/lr_scheduler.rb', line 6

def lr_decay_iters
  @lr_decay_iters
end

#min_lrObject (readonly)

Returns the value of attribute min_lr.



6
7
8
# File 'lib/nano_gpt/lr_scheduler.rb', line 6

def min_lr
  @min_lr
end

#warmup_itersObject (readonly)

Returns the value of attribute warmup_iters.



6
7
8
# File 'lib/nano_gpt/lr_scheduler.rb', line 6

def warmup_iters
  @warmup_iters
end

Instance Method Details

#get_lr(iter) ⇒ Object

Get learning rate for given iteration



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/nano_gpt/lr_scheduler.rb', line 16

def get_lr(iter)
  # 1) Linear warmup for warmup_iters steps
  if iter < @warmup_iters
    return @learning_rate * (iter + 1).to_f / (@warmup_iters + 1)
  end

  # 2) If iter > lr_decay_iters, return min learning rate
  if iter > @lr_decay_iters
    return @min_lr
  end

  # 3) In between, use cosine decay down to min learning rate
  decay_ratio = (iter - @warmup_iters).to_f / (@lr_decay_iters - @warmup_iters)
  coeff = 0.5 * (1.0 + Math.cos(Math::PI * decay_ratio))
  @min_lr + coeff * (@learning_rate - @min_lr)
end

#step(optimizer, iter) ⇒ Object

Apply learning rate to optimizer



34
35
36
37
38
39
40
# File 'lib/nano_gpt/lr_scheduler.rb', line 34

def step(optimizer, iter)
  lr = get_lr(iter)
  optimizer.param_groups.each do |group|
    group[:lr] = lr
  end
  lr
end