Class: Newral::QLearning::Base
- Inherits:
-
Object
- Object
- Newral::QLearning::Base
- Defined in:
- lib/newral/q_learning/base.rb
Instance Attribute Summary collapse
-
#game ⇒ Object
Returns the value of attribute game.
Instance Method Summary collapse
- #get_input(move: true) ⇒ Object
- #inform_game_ended ⇒ Object
-
#initialize(id: nil, game: nil, learning_rate: 0.4, discount: 0.9, epsilon: 0.9, sleep_time: 0.001) ⇒ Base
constructor
this q_learning algorithm was posted here www.practicalai.io/teaching-ai-play-simple-game-using-q-learning/ however I extended it so it can play more games also the q_table is implemente as a hash so actions can differ at different positions this way the algorithm also needs to know less about the game.
- #set_epsilon(epsilon) ⇒ Object
Constructor Details
#initialize(id: nil, game: nil, learning_rate: 0.4, discount: 0.9, epsilon: 0.9, sleep_time: 0.001) ⇒ Base
this q_learning algorithm was posted here www.practicalai.io/teaching-ai-play-simple-game-using-q-learning/ however I extended it so it can play more games also the q_table is implemente as a hash so actions can differ at different positions this way the algorithm also needs to know less about the game
10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/newral/q_learning/base.rb', line 10 def initialize( id: nil, game: nil, learning_rate: 0.4, discount: 0.9, epsilon: 0.9, sleep_time: 0.001 ) game.set_player( self ) @id = id @game = game @learning_rate = learning_rate @discount = discount @epsilon = epsilon @sleep = sleep_time @random = Random.new @q_hash = {} end |
Instance Attribute Details
#game ⇒ Object
Returns the value of attribute game.
4 5 6 |
# File 'lib/newral/q_learning/base.rb', line 4 def game @game end |
Instance Method Details
#get_input(move: true) ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/newral/q_learning/base.rb', line 31 def get_input( move: true ) # Our new state is equal to the player position @outcome_state = @game.get_position( player: self ) # which actions are available to the player at the moment? @actions = @game.get_actions( player: self ) # is this the first run initial_run = @q_hash.empty? @q_hash[@outcome_state] = @q_hash[@outcome_state] || {} @actions.each do |action| @q_hash[@outcome_state][action] = @q_hash[@outcome_state][action] || 0.1 # @random.rand/10.0 end if initial_run @action_taken = @actions.first elsif @old_state # If this is not the first run # Evaluate what happened on last action and update Q table # Calculate reward reward = 0 # default is 0 if @old_score < @game.get_score( player: self ) reward = [@game.get_score( player: self )-@old_score,1].max # reward is at least 1 if our score increased elsif @old_score > @game.get_score( player: self ) reward = [@old_score-@game.get_score( player: self ),-1].min # reward is smaller or equal -1 if our score decreased else reward = -0.1 # time is money, we punish moves end @q_hash[@old_state][@action_taken] = @q_hash[@old_state][@action_taken] + @learning_rate * (reward + @discount * (@q_hash[@outcome_state]).values.max.to_f - @q_hash[@old_state][@action_taken]) end # Capture current state and score @old_score = @game.get_score( player: self ) @old_state = @game.get_position( player: self ) # we remember this for next run, its current state @old_actions = @actions if move # in the goal state we just update the q_hash # Chose action based on Q value estimates for state if @random.rand > @epsilon || @q_hash[@old_state].nil? # Select random action @action_taken_index = @random.rand(@actions.length).round @action_taken = @actions[@action_taken_index] else # Select based on Q table, remember @old_state is equal to current state at this point @action_taken = @q_hash[@old_state].to_a.sort{ |v1,v2| v2[1]<=>v1[1]}[0][0] raise "impossible action #{ @action_taken } #{@old_state} #{@q_hash[ @old_state ] } #{ @actions } #{@old_actions } " unless @actions.member?( @action_taken) end # Take action return @action_taken else @old_state = nil # we do not have a old state any more as we have reached an end state end end |
#inform_game_ended ⇒ Object
26 27 28 |
# File 'lib/newral/q_learning/base.rb', line 26 def inform_game_ended get_input( move: false ) end |
#set_epsilon(epsilon) ⇒ Object
22 23 24 |
# File 'lib/newral/q_learning/base.rb', line 22 def set_epsilon( epsilon ) @epsilon = epsilon end |