Class: Elevenlabs::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/elevenlabs/client.rb

Constant Summary collapse

BASE_URL =
"https://api.elevenlabs.io"

Instance Method Summary collapse

Constructor Details

#initialize(api_key: nil, open_timeout: 5, read_timeout: 120) ⇒ Client

Note the default param: ‘api_key: nil`



12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/elevenlabs/client.rb', line 12

def initialize(api_key: nil, open_timeout: 5, read_timeout: 120)
  # If the caller doesn’t provide an api_key, use the gem-wide config
  @api_key = api_key || Elevenlabs.configuration&.api_key

  @connection = Faraday.new(url: BASE_URL) do |conn|
    conn.request :url_encoded
    conn.response :raise_error
    conn.options.open_timeout = open_timeout   # time to open connection
    conn.options.timeout      = read_timeout   # time to wait for response
    conn.adapter Faraday.default_adapter
  end
end

Instance Method Details

#active?(voice_id) ⇒ Boolean

Checks if a voice_id is in list_voices

Parameters:

  • voice_id (String)

Returns:

  • (Boolean)


447
448
449
450
# File 'lib/elevenlabs/client.rb', line 447

def active?(voice_id)
  active_voices = list_voices["voices"].map{|voice| voice["voice_id"]}
  voice_id.in?(active_voices)
end

#banned?(voice_id) ⇒ Boolean

Checks safety control on a single voice for “BAN”

Parameters:

  • voice_id (String)

Returns:

  • (Boolean)


434
435
436
437
# File 'lib/elevenlabs/client.rb', line 434

def banned?(voice_id)
  voice = get_voice(voice_id)
  voice["safety_control"] == "BAN"
end

#compose_music(options = {}) ⇒ Object

  1. Compose music (basic)

POST /v1/music



458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
# File 'lib/elevenlabs/client.rb', line 458

def compose_music(options = {})
  endpoint = "/v1/music"
  request_body = {
    prompt: options[:prompt],
    composition_plan: options[:composition_plan],
    music_length_ms: options[:music_length_ms],
    model_id: options[:model_id] || "music_v1"
  }.compact

  headers = default_headers.merge("Accept" => "audio/mpeg")
  query = {}
  query[:output_format] = options[:output_format] if options[:output_format]

  response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
    req.headers = headers
    req.body = request_body.to_json
  end

  response.body # raw binary audio
rescue Faraday::ClientError => e
  handle_error(e)
end

#compose_music_detailed(options = {}) ⇒ Object

  1. Compose detailed music (metadata + audio)

POST /v1/music/detailed



511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
# File 'lib/elevenlabs/client.rb', line 511

def compose_music_detailed(options = {})
  endpoint = "/v1/music/detailed"
  request_body = {
    prompt: options[:prompt],
    composition_plan: options[:composition_plan],
    music_length_ms: options[:music_length_ms],
    model_id: options[:model_id] || "music_v1"
  }.compact

  headers = default_headers
  query = {}
  query[:output_format] = options[:output_format] if options[:output_format]

  response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
    req.headers = headers
    req.body = request_body.to_json
  end

  response.body # multipart/mixed with JSON + binary audio
rescue Faraday::ClientError => e
  handle_error(e)
end

#compose_music_stream(options = {}, &block) ⇒ Object

  1. Stream music

POST /v1/music/stream



483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
# File 'lib/elevenlabs/client.rb', line 483

def compose_music_stream(options = {}, &block)
  endpoint = "/v1/music/stream"
  request_body = {
    prompt: options[:prompt],
    composition_plan: options[:composition_plan],
    music_length_ms: options[:music_length_ms],
    model_id: options[:model_id] || "music_v1"
  }.compact

  headers = default_headers.merge("Accept" => "audio/mpeg")
  query = {}
  query[:output_format] = options[:output_format] if options[:output_format]

  @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
    req.options.on_data = Proc.new do |chunk, _|
      block.call(chunk) if block
    end
    req.headers = headers
    req.body = request_body.to_json
  end

  nil # audio streamed via block
rescue Faraday::ClientError => e
  handle_error(e)
end

#create_from_generated_voice(voice_name, voice_description, generated_voice_id, labels: nil, played_not_selected_voice_ids: nil) ⇒ Hash

Create a Voice #

(POST /v1/text-to-voice/create)              #

Creates a voice from the designed voice generated_voice_id Documentation: elevenlabs.io/docs/api-reference/text-to-voice

Parameters:

  • voice_name (String)
    • Name of the voice

  • voice_description (String)
    • Description of the voice (20-1000 characters)

  • generated_voice_id (String)
    • The generated voice ID from design_voice

  • labels (Hash) (defaults to: nil)
    • Optional metadata for the voice

  • played_not_selected_voice_ids (Array<String>) (defaults to: nil)
    • Optional list of voice IDs played but not selected

Returns:

  • (Hash)

    JSON response containing voice_id and other voice details



244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/elevenlabs/client.rb', line 244

def create_from_generated_voice(voice_name, voice_description, generated_voice_id, labels: nil, played_not_selected_voice_ids: nil)
  endpoint = "/v1/text-to-voice"
  request_body = {
    voice_name: voice_name,
    voice_description: voice_description,
    generated_voice_id: generated_voice_id,
    labels: labels,
    played_not_selected_voice_ids: played_not_selected_voice_ids
  }.compact

  response = @connection.post(endpoint) do |req|
    req.headers = default_headers
    req.body = request_body.to_json
  end
  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#create_music_plan(options = {}) ⇒ Object

  1. Create a composition plan

POST /v1/music/plan



537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
# File 'lib/elevenlabs/client.rb', line 537

def create_music_plan(options = {})
  endpoint = "/v1/music/plan"
  request_body = {
    prompt: options[:prompt],
    music_length_ms: options[:music_length_ms],
    source_composition_plan: options[:source_composition_plan],
    model_id: options[:model_id] || "music_v1"
  }.compact

  response = @connection.post(endpoint) do |req|
    req.headers = default_headers
    req.body = request_body.to_json
  end

  JSON.parse(response.body, symbolize_names: true)
rescue Faraday::ClientError => e
  handle_error(e)
end

#create_voice(name, samples = [], options = {}) ⇒ Object

Creates a new voice NOTE: This method may require a multipart form request

if you are uploading sample audio files.

Parameters:

  • name (String)
    • name of the voice

  • samples (File) (defaults to: [])
    • array of files to train the voice

  • options (Hash) (defaults to: {})
    • additional parameters

    :description => String



333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
# File 'lib/elevenlabs/client.rb', line 333

def create_voice(name, samples = [], options = {})
  endpoint = "/v1/voices/add"

  # Ensure Faraday handles multipart form data
  mp_connection = Faraday.new(url: BASE_URL) do |conn|
    conn.request :multipart
    conn.response :raise_error
    conn.adapter Faraday.default_adapter
  end

  # Build multipart form parameters
  form_params = {
    "name" => name,
    "description" => options[:description] || ""
  }

  # Convert File objects to multipart upload format
  sample_files = []
  samples.each_with_index do |sample_file, i|
    sample_files << ["files", Faraday::UploadIO.new(sample_file.path, "audio/mpeg")]
  end

  # Perform the POST request
  response = mp_connection.post(endpoint) do |req|
    req.headers["xi-api-key"] = @api_key
    req.body = form_params.merge(sample_files.to_h)
  end

  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#delete_voice(voice_id) ⇒ Hash

Deletes a voice from your account

Parameters:

  • voice_id (String)

Returns:

  • (Hash)

    response



415
416
417
418
419
420
421
422
423
424
# File 'lib/elevenlabs/client.rb', line 415

def delete_voice(voice_id)
  endpoint = "/v1/voices/#{voice_id}"
  response = @connection.delete(endpoint) do |req|
    req.headers = default_headers
  end

  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#design_voice(voice_description, options = {}) ⇒ Hash

Designs a voice based on a description Documentation: elevenlabs.io/docs/api-reference/text-to-voice/design

Parameters:

  • voice_description (String)
    • Description of the voice (20-1000 characters)

  • options (Hash) (defaults to: {})
    • Optional parameters

    :output_format => String (e.g., “mp3_44100_192”, default: “mp3_44100_192”) :model_id => String (e.g., “eleven_multilingual_ttv_v2”, “eleven_ttv_v3”) :text => String (100-1000 characters, optional) :auto_generate_text => Boolean (default: false) :loudness => Float (-1 to 1, default: 0.5) :seed => Integer (0 to 2147483647, optional) :guidance_scale => Float (0 to 100, default: 5) :stream_previews => Boolean (default: false) :remixing_session_id => String (optional) :remixing_session_iteration_id => String (optional) :quality => Float (-1 to 1, optional) :reference_audio_base64 => String (base64 encoded audio, optional, requires eleven_ttv_v3) :prompt_strength => Float (0 to 1, optional, requires eleven_ttv_v3)

Returns:

  • (Hash)

    JSON response containing previews and text



201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'lib/elevenlabs/client.rb', line 201

def design_voice(voice_description, options = {})
  endpoint = "/v1/text-to-voice/design"
  request_body = { voice_description: voice_description }

  # Add optional parameters if provided
  request_body[:output_format] = options[:output_format] if options[:output_format]
  request_body[:model_id] = options[:model_id] if options[:model_id]
  request_body[:text] = options[:text] if options[:text]
  request_body[:auto_generate_text] = options[:auto_generate_text] unless options[:auto_generate_text].nil?
  request_body[:loudness] = options[:loudness] if options[:loudness]
  request_body[:seed] = options[:seed] if options[:seed]
  request_body[:guidance_scale] = options[:guidance_scale] if options[:guidance_scale]
  request_body[:stream_previews] = options[:stream_previews] unless options[:stream_previews].nil?
  request_body[:remixing_session_id] = options[:remixing_session_id] if options[:remixing_session_id]
  request_body[:remixing_session_iteration_id] = options[:remixing_session_iteration_id] if options[:remixing_session_iteration_id]
  request_body[:quality] = options[:quality] if options[:quality]
  request_body[:reference_audio_base64] = options[:reference_audio_base64] if options[:reference_audio_base64]
  request_body[:prompt_strength] = options[:prompt_strength] if options[:prompt_strength]

  response = @connection.post(endpoint) do |req|
    req.headers = default_headers
    req.body = request_body.to_json
  end

  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#edit_voice(voice_id, samples = [], options = {}) ⇒ Object

Edit a Voice #

(POST /v1/voices/{voice_id}/edit)       #

Updates an existing voice options [String] name options [String] description

Parameters:

  • voice_id (String)
  • samples (Array<File>) (defaults to: [])
  • options (Hash) (defaults to: {})


378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/elevenlabs/client.rb', line 378

def edit_voice(voice_id, samples = [], options = {})
  endpoint = "/v1/voices/#{voice_id}/edit"

  # Force text fields to be strings.
  form_params = {
    "name"        => options[:name].to_s,
    "description" => (options[:description] || "").to_s
  }

  form_params["files[]"] = samples.map do |sample_file|
    Faraday::UploadIO.new(sample_file.path, "audio/mpeg", File.basename(sample_file.path))
  end

  mp_connection = Faraday.new(url: BASE_URL) do |conn|
    conn.request :multipart
    conn.response :raise_error
    conn.adapter Faraday.default_adapter
  end

  response = mp_connection.post(endpoint) do |req|
    req.headers["xi-api-key"] = @api_key
    req.body = form_params
  end

  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#get_voice(voice_id) ⇒ Hash

Retrieves details about a single voice

Parameters:

  • voice_id (String)

Returns:

  • (Hash)

    Details of the voice



310
311
312
313
314
315
316
317
318
# File 'lib/elevenlabs/client.rb', line 310

def get_voice(voice_id)
  endpoint = "/v1/voices/#{voice_id}"
  response = @connection.get(endpoint) do |req|
    req.headers = default_headers
  end
  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#list_modelsHash

Gets a list of available models Documentation: elevenlabs.io/docs/api-reference/models/list

Returns:

  • (Hash)

    The JSON response containing an array of models



291
292
293
294
295
296
297
298
299
# File 'lib/elevenlabs/client.rb', line 291

def list_models
  endpoint = "/v1/models"
  response = @connection.get(endpoint) do |req|
    req.headers = default_headers
  end
  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#list_voicesHash

Retrieves all voices associated with your Elevenlabs account Documentation: elevenlabs.io/docs/api-reference/voices

Returns:

  • (Hash)

    The JSON response containing an array of voices



272
273
274
275
276
277
278
279
280
# File 'lib/elevenlabs/client.rb', line 272

def list_voices
  endpoint = "/v1/voices"
  response = @connection.get(endpoint) do |req|
    req.headers = default_headers
  end
  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#sound_generation(text, options = {}) ⇒ String

Convert text to sound effects and retrieve audio (binary data) Documentation: elevenlabs.io/docs/api-reference/sound-generation

Parameters:

  • text (String)
    • text prompt describing the sound effect

  • options (Hash) (defaults to: {})
    • optional parameters

    :loop => Boolean (whether to create a looping sound effect, default: false) :duration_seconds => Float (0.5 to 30 seconds, default: nil for auto-detection) :prompt_influence => Float (0.0 to 1.0, default: 0.3) :output_format => String (e.g., “mp3_22050_32”, default: “mp3_44100_128”)

Returns:

  • (String)

    The binary audio data (usually an MP3).



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/elevenlabs/client.rb', line 150

def sound_generation(text, options = {})
  endpoint = "/v1/sound-generation"
  request_body = { text: text }

  # Add optional parameters if provided
  request_body[:loop] = options[:loop] unless options[:loop].nil?
  request_body[:duration_seconds] = options[:duration_seconds] if options[:duration_seconds]
  request_body[:prompt_influence] = options[:prompt_influence] if options[:prompt_influence]

  headers = default_headers
  headers["Accept"] = "audio/mpeg"

  query = {}
  query[:output_format] = options[:output_format] if options[:output_format]

  response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
    req.headers = headers
    req.body = request_body.to_json
  end

  # Returns raw binary data (often MP3)
  response.body
rescue Faraday::ClientError => e
  handle_error(e)
end

#text_to_dialogue(inputs, model_id = nil, settings = {}, seed = nil) ⇒ String

Converts a list of text and voice ID pairs into speech (dialogue) and returns audio. Documentation: elevenlabs.io/docs/api-reference/text-to-dialogue/convert

Parameters:

  • inputs (Array[Objects])
    • A list of dialogue inputs, each containing text and a voice ID which will be converted into speech

    :text => String :voice_id => String

  • model_id (String) (defaults to: nil)
    • optional Identifier of the model to be used

  • settings (Hash) (defaults to: {})
    • optinal Settings controlling the dialogue generation

    :stability => double - 0.0 = Creative, 0.5 = Natural, 1.0 = Robust :use_speaker_boost => boolean

  • seed (Integer) (defaults to: nil)
    • optional Best effort to sample deterministically.

Returns:

  • (String)

    The binary audio data (usually an MP3).



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/elevenlabs/client.rb', line 111

def text_to_dialogue(inputs, model_id = nil, settings = {}, seed = nil)
  endpoint = "/v1/text-to-dialogue"
  request_body = {}.tap do |r|
    r[:inputs] = inputs
    r[:model_id] = model_id if model_id
    r[:settings] = settings unless settings.empty?
    r[:seed] = seed if seed
  end

  headers = default_headers
  headers["Accept"] = "audio/mpeg"

  response = @connection.post(endpoint) do |req|
    req.headers = headers
    req.body = request_body.to_json
  end

  # Returns raw binary data (often MP3)
  response.body
rescue Faraday::ClientError => e
  handle_error(e)
end

#text_to_speech(voice_id, text, options = {}) ⇒ String

Convert text to speech and retrieve audio (binary data) Documentation: elevenlabs.io/docs/api-reference/text-to-speech/convert

Parameters:

  • voice_id (String)
    • the ID of the voice to use

  • text (String)
    • text to synthesize

  • options (Hash) (defaults to: {})
    • optional TTS parameters

    :model_id => String (e.g. “eleven_monolingual_v1” or “eleven_multilingual_v1”) :voice_settings => Hash (stability, similarity_boost, style, use_speaker_boost, etc.) :optimize_streaming => Boolean (whether to receive chunked streaming audio)

Returns:

  • (String)

    The binary audio data (usually an MP3).



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/elevenlabs/client.rb', line 41

def text_to_speech(voice_id, text, options = {})
  endpoint = "/v1/text-to-speech/#{voice_id}"
  request_body = { text: text }

  # If user provided voice_settings, add them
  if options[:voice_settings]
    request_body[:voice_settings] = options[:voice_settings]
  end

  # If user specified a model_id, add it
  request_body[:model_id] = options[:model_id] if options[:model_id]

  # If user wants streaming optimization
  headers = default_headers
  if options[:optimize_streaming]
    headers["Accept"] = "audio/mpeg"
    headers["Transfer-Encoding"] = "chunked"
  end

  response = @connection.post(endpoint) do |req|
    req.headers = headers
    req.body = request_body.to_json
  end

  # Returns raw binary data (often MP3)
  response.body
rescue Faraday::ClientError => e
  handle_error(e)
end

#text_to_speech_stream(voice_id, text, options = {}, &block) ⇒ Object

Text-to-Speech-Stream # (POST /v1/text-to-speech/voice_id)/stream #



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/elevenlabs/client.rb', line 75

def text_to_speech_stream(voice_id, text, options = {}, &block)
  endpoint = "/v1/text-to-speech/#{voice_id}/stream?output_format=mp3_44100_128"
  request_body = { text: text, model_id: options[:model_id] || "eleven_multilingual_v2" }

  headers = default_headers
  headers["Accept"] = "audio/mpeg"

  response = @connection.post(endpoint, request_body.to_json, headers) do |req|
    req.options.on_data = Proc.new do |chunk, _|
      block.call(chunk) if block_given?
    end
  end

  response
rescue Faraday::ClientError => e
  handle_error(e)
end