Class: Gitlab::Database::LoadBalancing::LoadBalancer

Inherits:
Object
  • Object
show all
Defined in:
lib/gitlab/database/load_balancing/load_balancer.rb

Overview

Load balancing for ActiveRecord connections.

Each host in the load balancer uses the same credentials as the primary database.

Constant Summary collapse

ANY_CAUGHT_UP =
:any
ALL_CAUGHT_UP =
:all
NONE_CAUGHT_UP =
:none
CACHE_KEY =
:gitlab_load_balancer_host
REPLICA_SUFFIX =
'_replica'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(configuration) ⇒ LoadBalancer

configuration - An instance of ‘LoadBalancing::Configuration` that

contains the configuration details (such as the hosts)
for this load balancer.


26
27
28
29
30
31
32
33
34
35
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 26

def initialize(configuration)
  @configuration = configuration
  @primary_only = !configuration.load_balancing_enabled?
  @host_list =
    if @primary_only
      HostList.new([PrimaryHost.new(self)])
    else
      HostList.new(configuration.hosts.map { |addr| Host.new(addr, self) })
    end
end

Instance Attribute Details

#configurationObject (readonly)

Returns the value of attribute configuration.



21
22
23
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 21

def configuration
  @configuration
end

#host_listObject (readonly)

Returns the value of attribute host_list.



21
22
23
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 21

def host_list
  @host_list
end

#service_discoveryObject

Returns the value of attribute service_discovery.



19
20
21
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 19

def service_discovery
  @service_discovery
end

Instance Method Details

#connection_error?(error) ⇒ Boolean

Returns:

  • (Boolean)


249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 249

def connection_error?(error)
  case error
  when ActiveRecord::NoDatabaseError
    # Retrying this error isn't going to magically make the database
    # appear. It also slows down CI jobs that are meant to create the
    # database in the first place.
    false
  when ActiveRecord::StatementInvalid, ActionView::Template::Error
    # After connecting to the DB Rails will wrap query errors using this
    # class.
    if (cause = error.cause)
      connection_error?(cause)
    else
      false
    end
  when *CONNECTION_ERRORS
    true
  else
    # When PG tries to set the client encoding but fails due to a
    # connection error it will raise a PG::Error instance. Catching that
    # would catch all errors (even those we don't want), so instead we
    # check for the message of the error.
    error.message.start_with?('invalid encoding name:')
  end
end

#create_replica_connection_pool(pool_size, host = nil, port = nil) ⇒ Object

pool_size - The size of the DB pool. host - An optional host name to use instead of the default one. port - An optional port to connect to.



286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 286

def create_replica_connection_pool(pool_size, host = nil, port = nil)
  db_config = @configuration.db_config

  env_config = db_config.configuration_hash.dup
  env_config[:pool] = pool_size
  env_config[:host] = host if host
  env_config[:port] = port if port

  db_config = ActiveRecord::DatabaseConfigurations::HashConfig.new(
    db_config.env_name,
    db_config.name + REPLICA_SUFFIX,
    env_config
  )

  # We cannot use ActiveRecord::Base.connection_handler.establish_connection
  # as it will rewrite ActiveRecord::Base.connection
  ActiveRecord::ConnectionAdapters::ConnectionHandler
    .new
    .establish_connection(db_config)
end

#disconnect!(timeout: 120) ⇒ Object



45
46
47
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 45

def disconnect!(timeout: 120)
  host_list.hosts.each { |host| host.disconnect!(timeout: timeout) }
end

#hostObject

Returns a host to use for queries.

Hosts are scoped per thread so that multiple threads don’t accidentally re-use the same host + connection.



156
157
158
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 156

def host
  request_cache[CACHE_KEY] ||= @host_list.next
end

#nameObject



37
38
39
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 37

def name
  @configuration.db_config_name
end

#poolObject

ActiveRecord::ConnectionAdapters::ConnectionHandler handles fetching, and caching for connections pools for each “connection”, so we leverage that. rubocop:disable Database/MultipleDatabases



311
312
313
314
315
316
317
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 311

def pool
  ActiveRecord::Base.connection_handler.retrieve_connection_pool(
    @configuration.connection_specification_name,
    role: ActiveRecord.writing_role,
    shard: ActiveRecord::Base.default_shard
  ) || raise(::ActiveRecord::ConnectionNotEstablished)
end

#primary_only?Boolean

Returns:

  • (Boolean)


41
42
43
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 41

def primary_only?
  @primary_only
end

#primary_write_locationObject

Returns the transaction write location of the primary.



175
176
177
178
179
180
181
182
183
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 175

def primary_write_location
  location = read_write do |connection|
    get_write_location(connection)
  end

  return location if location

  raise 'Failed to determine the write location of the primary database'
end

#read(&block) ⇒ Object

Yields a connection that can be used for reads.

If no secondaries were available this method will use the primary instead.



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 53

def read(&block)
  service_discovery&.log_refresh_thread_interruption

  conflict_retried = 0

  while host
    ensure_caching!

    begin
      connection = host.connection
      return yield connection
    rescue StandardError => error
      if primary_only?
        # If we only have primary configured, retrying is pointless
        raise error
      elsif serialization_failure?(error)
        # This error can occur when a query conflicts. See
        # https://www.postgresql.org/docs/current/static/hot-standby.html#HOT-STANDBY-CONFLICT
        # for more information.
        #
        # In this event we'll cycle through the secondaries at most 3
        # times before using the primary instead.
        will_retry = conflict_retried < @host_list.length * 3

        ::Gitlab::Database::LoadBalancing::Logger.warn(
          event: :host_query_conflict,
          message: 'Query conflict on host',
          conflict_retried: conflict_retried,
          will_retry: will_retry,
          db_host: host.host,
          db_port: host.port,
          host_list_length: @host_list.length
        )

        if will_retry
          conflict_retried += 1
          release_host
        else
          break
        end
      elsif connection_error?(error)
        host.offline!
        release_host
      else
        raise error
      end
    end
  end

  ::Gitlab::Database::LoadBalancing::Logger.warn(
    event: :no_secondaries_available,
    message: 'No secondaries were available, using primary instead',
    conflict_retried: conflict_retried,
    host_list_length: @host_list.length
  )

  read_write(&block)
end

#read_writeObject

Yields a connection that can be used for both reads and writes.



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 113

def read_write
  service_discovery&.log_refresh_thread_interruption

  connection = nil
  transaction_open = nil

  # Retry only once when in a transaction (see https://gitlab.com/gitlab-org/gitlab/-/issues/220242)
  attempts = pool.connection.transaction_open? ? 1 : 3

  # In the event of a failover the primary may be briefly unavailable.
  # Instead of immediately grinding to a halt we'll retry the operation
  # a few times.
  # It is not possible preserve transaction state during a retry, so we do not retry in that case.
  retry_with_backoff(attempts: attempts) do |attempt|
    connection = pool.connection
    transaction_open = connection.transaction_open?

    if attempt && attempt > 1
      ::Gitlab::Database::LoadBalancing::Logger.warn(
        event: :read_write_retry,
        message: 'A read_write block was retried because of connection error'
      )
    end

    yield connection
  rescue StandardError => e
    # No leaking will happen on the final attempt. Leaks are caused by subsequent retries
    not_final_attempt = attempt && attempt < attempts
    if transaction_open && connection_error?(e) && not_final_attempt
      ::Gitlab::Database::LoadBalancing::Logger.warn(
        event: :transaction_leak,
        message: 'A write transaction has leaked during database fail-over'
      )
    end

    raise e
  end
end

#release_hostObject

Releases the host and connection for the current thread.



161
162
163
164
165
166
167
168
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 161

def release_host
  if host = request_cache[CACHE_KEY]
    host.disable_query_cache!
    host.release_connection
  end

  request_cache.delete(CACHE_KEY)
end

#release_primary_connectionObject



170
171
172
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 170

def release_primary_connection
  pool.release_connection
end

#retry_with_backoff(attempts: 3, time: 2) ⇒ Object

Yields a block, retrying it upon error using an exponential backoff.



214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 214

def retry_with_backoff(attempts: 3, time: 2)
  # In CI we only use the primary, but databases may not always be
  # available (or take a few seconds to become available). Retrying in
  # this case can slow down CI jobs. In addition, retrying with _only_
  # a primary being present isn't all that helpful.
  #
  # To prevent this from happening, we don't make any attempt at
  # retrying unless one or more replicas are used. This matches the
  # behaviour from before we enabled load balancing code even if no
  # replicas were configured.
  return yield if primary_only?

  attempt = 1
  last_error = nil

  while attempt <= attempts
    begin
      return yield attempt # Yield the current attempt count
    rescue StandardError => error
      raise error unless connection_error?(error)

      # We need to release the primary connection as otherwise Rails
      # will keep raising errors when using the connection.
      release_primary_connection

      last_error = error
      sleep(time)
      attempt += 1
      time **= 2
    end
  end

  raise last_error
end

#select_up_to_date_host(location) ⇒ Object

Finds any up to date replica for the given LSN location and stores an up to date replica in the SafeRequestStore to be used later for read-only queries. It returns a symbol to indicate if :any, :all or :none were found to be caught up.



188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 188

def select_up_to_date_host(location)
  all_hosts = @host_list.hosts.shuffle
  first_caught_up_host = nil

  # We must loop through all of them so that we know if all are caught up. Some callers only care about finding
  # one caught up host and storing it in request_cache. But Sticking needs to know if ALL_CAUGHT_UP so that it
  # can clear the LSN position from Redis and not ask again in future.
  results = all_hosts.map do |host|
    caught_up = host.caught_up?(location)
    first_caught_up_host ||= host if caught_up
    caught_up
  end

  ActiveSupport::Notifications.instrument(
    'caught_up_replica_pick.load_balancing',
    { result: first_caught_up_host.present? }
  )

  return NONE_CAUGHT_UP unless first_caught_up_host

  request_cache[CACHE_KEY] = first_caught_up_host

  results.all? ? ALL_CAUGHT_UP : ANY_CAUGHT_UP
end

#serialization_failure?(error) ⇒ Boolean

Returns:

  • (Boolean)


275
276
277
278
279
280
281
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 275

def serialization_failure?(error)
  if error.cause
    serialization_failure?(error.cause)
  else
    error.is_a?(PG::TRSerializationFailure)
  end
end

#wal_diff(location1, location2) ⇒ Object

rubocop:enable Database/MultipleDatabases



320
321
322
323
324
325
326
327
328
329
330
331
332
333
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 320

def wal_diff(location1, location2)
  read_write do |connection|
    lsn1 = connection.quote(location1)
    lsn2 = connection.quote(location2)

    query = <<-SQL.squish
    SELECT pg_wal_lsn_diff(#{lsn1}, #{lsn2})
      AS result
    SQL

    row = connection.select_all(query).first
    row['result'] if row
  end
end