Class: Kafka::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/kafka/client.rb

Instance Method Summary collapse

Constructor Details

#initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil, ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil, sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil) ⇒ Client

Initializes a new Kafka client.



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/kafka/client.rb', line 51

def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
               ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
               sasl_gssapi_principal: nil, sasl_gssapi_keytab: nil,
               sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil)
  @logger = logger || Logger.new(nil)
  @instrumenter = Instrumenter.new(client_id: client_id)
  @seed_brokers = normalize_seed_brokers(seed_brokers)

  ssl_context = build_ssl_context(ssl_ca_cert_file_path, ssl_ca_cert, ssl_client_cert, ssl_client_cert_key)

  @connection_builder = ConnectionBuilder.new(
    client_id: client_id,
    connect_timeout: connect_timeout,
    socket_timeout: socket_timeout,
    ssl_context: ssl_context,
    logger: @logger,
    instrumenter: @instrumenter,
    sasl_gssapi_principal: sasl_gssapi_principal,
    sasl_gssapi_keytab: sasl_gssapi_keytab,
    sasl_plain_authzid: sasl_plain_authzid,
    sasl_plain_username: sasl_plain_username,
    sasl_plain_password: sasl_plain_password
  )

  @cluster = initialize_cluster
end

Instance Method Details

#async_producer(delivery_interval: 0, delivery_threshold: 0, max_queue_size: 1000, **options) ⇒ AsyncProducer

Creates a new AsyncProducer instance.

All parameters allowed by #producer can be passed. In addition to this, a few extra parameters can be passed when creating an async producer.

See Also:



208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/kafka/client.rb', line 208

def async_producer(delivery_interval: 0, delivery_threshold: 0, max_queue_size: 1000, **options)
  sync_producer = producer(**options)

  AsyncProducer.new(
    sync_producer: sync_producer,
    delivery_interval: delivery_interval,
    delivery_threshold: delivery_threshold,
    max_queue_size: max_queue_size,
    instrumenter: @instrumenter,
    logger: @logger,
  )
end

#closenil

Closes all connections to the Kafka brokers and frees up used resources.



454
455
456
# File 'lib/kafka/client.rb', line 454

def close
  @cluster.disconnect
end

#consumer(group_id:, session_timeout: 30, offset_commit_interval: 10, offset_commit_threshold: 0, heartbeat_interval: 10, offset_retention_time: nil) ⇒ Consumer

Creates a new Kafka consumer.



236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
# File 'lib/kafka/client.rb', line 236

def consumer(group_id:, session_timeout: 30, offset_commit_interval: 10, offset_commit_threshold: 0, heartbeat_interval: 10, offset_retention_time: nil)
  cluster = initialize_cluster

  instrumenter = DecoratingInstrumenter.new(@instrumenter, {
    group_id: group_id,
  })

  # The Kafka protocol expects the retention time to be in ms.
  retention_time = (offset_retention_time && offset_retention_time * 1_000) || -1

  group = ConsumerGroup.new(
    cluster: cluster,
    logger: @logger,
    group_id: group_id,
    session_timeout: session_timeout,
    retention_time: retention_time
  )

  offset_manager = OffsetManager.new(
    cluster: cluster,
    group: group,
    logger: @logger,
    commit_interval: offset_commit_interval,
    commit_threshold: offset_commit_threshold,
    offset_retention_time: offset_retention_time
  )

  heartbeat = Heartbeat.new(
    group: group,
    interval: heartbeat_interval,
  )

  Consumer.new(
    cluster: cluster,
    logger: @logger,
    instrumenter: instrumenter,
    group: group,
    offset_manager: offset_manager,
    session_timeout: session_timeout,
    heartbeat: heartbeat,
  )
end

#deliver_message(value, key: nil, topic:, partition: nil, partition_key: nil) ⇒ nil

Delivers a single message to the Kafka cluster.

Note: Only use this API for low-throughput scenarios. If you want to deliver many messages at a high rate, or if you want to configure the way messages are sent, use the #producer or #async_producer APIs instead.



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/kafka/client.rb', line 93

def deliver_message(value, key: nil, topic:, partition: nil, partition_key: nil)
  create_time = Time.now

  message = PendingMessage.new(
    value,
    key,
    topic,
    partition,
    partition_key,
    create_time,
  )

  if partition.nil?
    partition_count = @cluster.partitions_for(topic).count
    partition = Partitioner.partition_for_key(partition_count, message)
  end

  buffer = MessageBuffer.new

  buffer.write(
    value: message.value,
    key: message.key,
    topic: message.topic,
    partition: partition,
    create_time: message.create_time,
  )

  @cluster.add_target_topics([topic])

  compressor = Compressor.new(
    instrumenter: @instrumenter,
  )

  operation = ProduceOperation.new(
    cluster: @cluster,
    buffer: buffer,
    required_acks: 1,
    ack_timeout: 10,
    compressor: compressor,
    logger: @logger,
    instrumenter: @instrumenter,
  )

  operation.execute

  unless buffer.empty?
    raise DeliveryFailed
  end
end

#each_message(topic:, start_from_beginning: true, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, &block) ⇒ nil

Enumerate all messages in a topic.



376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/kafka/client.rb', line 376

def each_message(topic:, start_from_beginning: true, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, &block)
  default_offset ||= start_from_beginning ? :earliest : :latest
  offsets = Hash.new { default_offset }

  loop do
    operation = FetchOperation.new(
      cluster: @cluster,
      logger: @logger,
      min_bytes: min_bytes,
      max_wait_time: max_wait_time,
    )

    @cluster.partitions_for(topic).map(&:partition_id).each do |partition|
      partition_offset = offsets[partition]
      operation.fetch_from_partition(topic, partition, offset: partition_offset, max_bytes: max_bytes)
    end

    batches = operation.execute

    batches.each do |batch|
      batch.messages.each(&block)
      offsets[batch.partition] = batch.last_offset + 1
    end
  end
end

#fetch_messages(topic:, partition:, offset: :latest, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576) ⇒ Array<Kafka::FetchedMessage>

Note:

This API is still alpha level. Don't try to use it in production.

Fetches a batch of messages from a single partition. Note that it's possible to get back empty batches.

The starting point for the fetch can be configured with the :offset argument. If you pass a number, the fetch will start at that offset. However, there are two special Symbol values that can be passed instead:

  • :earliest — the first offset in the partition.
  • :latest — the next offset that will be written to, effectively making the call block until there is a new message in the partition.

The Kafka protocol specifies the numeric values of these two options: -2 and -1, respectively. You can also pass in these numbers directly.

Example

When enumerating the messages in a partition, you typically fetch batches sequentially.

offset = :earliest

loop do
  messages = kafka.fetch_messages(
    topic: "my-topic",
    partition: 42,
    offset: offset,
  )

  messages.each do |message|
    puts message.offset, message.key, message.value

    # Set the next offset that should be read to be the subsequent
    # offset.
    offset = message.offset + 1
  end
end

See a working example in examples/simple-consumer.rb.



340
341
342
343
344
345
346
347
348
349
350
351
# File 'lib/kafka/client.rb', line 340

def fetch_messages(topic:, partition:, offset: :latest, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576)
  operation = FetchOperation.new(
    cluster: @cluster,
    logger: @logger,
    min_bytes: min_bytes,
    max_wait_time: max_wait_time,
  )

  operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)

  operation.execute.flat_map {|batch| batch.messages }
end

#last_offset_for(topic, partition) ⇒ Integer

Retrieve the offset of the last message in a partition. If there are no messages in the partition -1 is returned.



425
426
427
428
429
# File 'lib/kafka/client.rb', line 425

def last_offset_for(topic, partition)
  # The offset resolution API will return the offset of the "next" message to
  # be written when resolving the "latest" offset, so we subtract one.
  @cluster.resolve_offset(topic, partition, :latest) - 1
end

#last_offsets_for(*topics) ⇒ Hash<String, Hash<Integer, Integer>>

Retrieve the offset of the last message in each partition of the specified topics.

Examples:

last_offsets_for('topic-1', 'topic-2') # =>
# {
#   'topic-1' => { 0 => 100, 1 => 100 },
#   'topic-2' => { 0 => 100, 1 => 100 }
# }


442
443
444
445
446
447
448
449
# File 'lib/kafka/client.rb', line 442

def last_offsets_for(*topics)
  @cluster.add_target_topics(topics)
  topics.map {|topic|
    partition_ids = @cluster.partitions_for(topic).collect(&:partition_id)
    partition_offsets = @cluster.resolve_offsets(topic, partition_ids, :latest)
    [topic, partition_offsets.collect { |k, v| [k, v - 1] }.to_h]
  }.to_h
end

#partitions_for(topic) ⇒ Integer

Counts the number of partitions in a topic.



414
415
416
# File 'lib/kafka/client.rb', line 414

def partitions_for(topic)
  @cluster.partitions_for(topic).count
end

#producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000) ⇒ Kafka::Producer

Initializes a new Kafka producer.



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/kafka/client.rb', line 173

def producer(compression_codec: nil, compression_threshold: 1, ack_timeout: 5, required_acks: :all, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000, max_buffer_bytesize: 10_000_000)
  compressor = Compressor.new(
    codec_name: compression_codec,
    threshold: compression_threshold,
    instrumenter: @instrumenter,
  )

  Producer.new(
    cluster: initialize_cluster,
    logger: @logger,
    instrumenter: @instrumenter,
    compressor: compressor,
    ack_timeout: ack_timeout,
    required_acks: required_acks,
    max_retries: max_retries,
    retry_backoff: retry_backoff,
    max_buffer_size: max_buffer_size,
    max_buffer_bytesize: max_buffer_bytesize,
  )
end

#topicsArray<String>

Lists all topics in the cluster.



405
406
407
408
# File 'lib/kafka/client.rb', line 405

def topics
  @cluster.clear_target_topics
  @cluster.topics
end