Class: Gitlab::Database::LoadBalancing::LoadBalancer
- Inherits:
-
Object
- Object
- Gitlab::Database::LoadBalancing::LoadBalancer
- Defined in:
- lib/gitlab/database/load_balancing/load_balancer.rb
Overview
Load balancing for ActiveRecord connections.
Each host in the load balancer uses the same credentials as the primary database.
Constant Summary collapse
- CACHE_KEY =
:gitlab_load_balancer_host
- REPLICA_SUFFIX =
'_replica'
Instance Attribute Summary collapse
-
#configuration ⇒ Object
readonly
Returns the value of attribute configuration.
-
#host_list ⇒ Object
readonly
Returns the value of attribute host_list.
Instance Method Summary collapse
- #connection_error?(error) ⇒ Boolean
-
#create_replica_connection_pool(pool_size, host = nil, port = nil) ⇒ Object
pool_size - The size of the DB pool.
- #disconnect!(timeout: 120) ⇒ Object
-
#host ⇒ Object
Returns a host to use for queries.
-
#initialize(configuration) ⇒ LoadBalancer
constructor
configuration - An instance of `LoadBalancing::Configuration` that contains the configuration details (such as the hosts) for this load balancer.
- #name ⇒ Object
-
#pool ⇒ Object
ActiveRecord::ConnectionAdapters::ConnectionHandler handles fetching, and caching for connections pools for each “connection”, so we leverage that.
- #primary_only? ⇒ Boolean
-
#primary_write_location ⇒ Object
Returns the transaction write location of the primary.
-
#read(&block) ⇒ Object
Yields a connection that can be used for reads.
-
#read_write ⇒ Object
Yields a connection that can be used for both reads and writes.
-
#release_host ⇒ Object
Releases the host and connection for the current thread.
- #release_primary_connection ⇒ Object
-
#retry_with_backoff(retries = 3, time = 2) ⇒ Object
Yields a block, retrying it upon error using an exponential backoff.
-
#select_up_to_date_host(location) ⇒ Object
Returns true if there was at least one host that has caught up with the given transaction.
- #serialization_failure?(error) ⇒ Boolean
- #wal_diff(location1, location2) ⇒ Object
Constructor Details
#initialize(configuration) ⇒ LoadBalancer
configuration - An instance of `LoadBalancing::Configuration` that
contains the configuration details (such as the hosts)
for this load balancer.
20 21 22 23 24 25 26 27 28 29 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 20 def initialize(configuration) @configuration = configuration @primary_only = !configuration.load_balancing_enabled? @host_list = if @primary_only HostList.new([PrimaryHost.new(self)]) else HostList.new(configuration.hosts.map { |addr| Host.new(addr, self) }) end end |
Instance Attribute Details
#configuration ⇒ Object (readonly)
Returns the value of attribute configuration.
15 16 17 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 15 def configuration @configuration end |
#host_list ⇒ Object (readonly)
Returns the value of attribute host_list.
15 16 17 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 15 def host_list @host_list end |
Instance Method Details
#connection_error?(error) ⇒ Boolean
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 197 def connection_error?(error) case error when ActiveRecord::NoDatabaseError # Retrying this error isn't going to magically make the database # appear. It also slows down CI jobs that are meant to create the # database in the first place. false when ActiveRecord::StatementInvalid, ActionView::Template::Error # After connecting to the DB Rails will wrap query errors using this # class. if (cause = error.cause) connection_error?(cause) else false end when *CONNECTION_ERRORS true else # When PG tries to set the client encoding but fails due to a # connection error it will raise a PG::Error instance. Catching that # would catch all errors (even those we don't want), so instead we # check for the message of the error. error..start_with?('invalid encoding name:') end end |
#create_replica_connection_pool(pool_size, host = nil, port = nil) ⇒ Object
pool_size - The size of the DB pool. host - An optional host name to use instead of the default one. port - An optional port to connect to.
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 234 def create_replica_connection_pool(pool_size, host = nil, port = nil) db_config = @configuration.replica_db_config env_config = db_config.configuration_hash.dup env_config[:pool] = pool_size env_config[:host] = host if host env_config[:port] = port if port replica_db_config = ActiveRecord::DatabaseConfigurations::HashConfig.new( db_config.env_name, db_config.name + REPLICA_SUFFIX, env_config ) # We cannot use ActiveRecord::Base.connection_handler.establish_connection # as it will rewrite ActiveRecord::Base.connection ActiveRecord::ConnectionAdapters::ConnectionHandler .new .establish_connection(replica_db_config) end |
#disconnect!(timeout: 120) ⇒ Object
39 40 41 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 39 def disconnect!(timeout: 120) host_list.hosts.each { |host| host.disconnect!(timeout: timeout) } end |
#host ⇒ Object
Returns a host to use for queries.
Hosts are scoped per thread so that multiple threads don't accidentally re-use the same host + connection.
120 121 122 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 120 def host request_cache[CACHE_KEY] ||= @host_list.next end |
#name ⇒ Object
31 32 33 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 31 def name @configuration.db_config_name end |
#pool ⇒ Object
ActiveRecord::ConnectionAdapters::ConnectionHandler handles fetching, and caching for connections pools for each “connection”, so we leverage that.
258 259 260 261 262 263 264 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 258 def pool ActiveRecord::Base.connection_handler.retrieve_connection_pool( @configuration.primary_connection_specification_name, role: ActiveRecord::Base.writing_role, shard: ActiveRecord::Base.default_shard ) || raise(::ActiveRecord::ConnectionNotEstablished) end |
#primary_only? ⇒ Boolean
35 36 37 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 35 def primary_only? @primary_only end |
#primary_write_location ⇒ Object
Returns the transaction write location of the primary.
139 140 141 142 143 144 145 146 147 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 139 def primary_write_location location = read_write do |connection| get_write_location(connection) end return location if location raise 'Failed to determine the write location of the primary database' end |
#read(&block) ⇒ Object
Yields a connection that can be used for reads.
If no secondaries were available this method will use the primary instead.
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 47 def read(&block) conflict_retried = 0 while host ensure_caching! begin connection = host.connection return yield connection rescue StandardError => error if primary_only? # If we only have primary configured, retrying is pointless raise error elsif serialization_failure?(error) # This error can occur when a query conflicts. See # https://www.postgresql.org/docs/current/static/hot-standby.html#HOT-STANDBY-CONFLICT # for more information. # # In this event we'll cycle through the secondaries at most 3 # times before using the primary instead. will_retry = conflict_retried < @host_list.length * 3 ::Gitlab::Database::LoadBalancing::Logger.warn( event: :host_query_conflict, message: 'Query conflict on host', conflict_retried: conflict_retried, will_retry: will_retry, db_host: host.host, db_port: host.port, host_list_length: @host_list.length ) if will_retry conflict_retried += 1 release_host else break end elsif connection_error?(error) host.offline! release_host else raise error end end end ::Gitlab::Database::LoadBalancing::Logger.warn( event: :no_secondaries_available, message: 'No secondaries were available, using primary instead', conflict_retried: conflict_retried, host_list_length: @host_list.length ) read_write(&block) end |
#read_write ⇒ Object
Yields a connection that can be used for both reads and writes.
105 106 107 108 109 110 111 112 113 114 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 105 def read_write connection = nil # In the event of a failover the primary may be briefly unavailable. # Instead of immediately grinding to a halt we'll retry the operation # a few times. retry_with_backoff do connection = pool.connection yield connection end end |
#release_host ⇒ Object
Releases the host and connection for the current thread.
125 126 127 128 129 130 131 132 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 125 def release_host if host = request_cache[CACHE_KEY] host.disable_query_cache! host.release_connection end request_cache.delete(CACHE_KEY) end |
#release_primary_connection ⇒ Object
134 135 136 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 134 def release_primary_connection pool.release_connection end |
#retry_with_backoff(retries = 3, time = 2) ⇒ Object
Yields a block, retrying it upon error using an exponential backoff.
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 162 def retry_with_backoff(retries = 3, time = 2) # In CI we only use the primary, but databases may not always be # available (or take a few seconds to become available). Retrying in # this case can slow down CI jobs. In addition, retrying with _only_ # a primary being present isn't all that helpful. # # To prevent this from happening, we don't make any attempt at # retrying unless one or more replicas are used. This matches the # behaviour from before we enabled load balancing code even if no # replicas were configured. return yield if primary_only? retried = 0 last_error = nil while retried < retries begin return yield rescue StandardError => error raise error unless connection_error?(error) # We need to release the primary connection as otherwise Rails # will keep raising errors when using the connection. release_primary_connection last_error = error sleep(time) retried += 1 time **= 2 end end raise last_error end |
#select_up_to_date_host(location) ⇒ Object
Returns true if there was at least one host that has caught up with the given transaction.
150 151 152 153 154 155 156 157 158 159 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 150 def select_up_to_date_host(location) all_hosts = @host_list.hosts.shuffle host = all_hosts.find { |host| host.caught_up?(location) } return false unless host request_cache[CACHE_KEY] = host true end |
#serialization_failure?(error) ⇒ Boolean
223 224 225 226 227 228 229 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 223 def serialization_failure?(error) if error.cause serialization_failure?(error.cause) else error.is_a?(PG::TRSerializationFailure) end end |
#wal_diff(location1, location2) ⇒ Object
266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
# File 'lib/gitlab/database/load_balancing/load_balancer.rb', line 266 def wal_diff(location1, location2) read_write do |connection| lsn1 = connection.quote(location1) lsn2 = connection.quote(location2) query = <<-SQL.squish SELECT pg_wal_lsn_diff(#{lsn1}, #{lsn2}) AS result SQL row = connection.select_all(query).first row['result'] if row end end |