Class: Elevenlabs::Client

Inherits:

Object

Object
Elevenlabs::Client

show all

Defined in:: lib/elevenlabs/client.rb

Constant Summary collapse

BASE_URL =

"https://api.elevenlabs.io"

Instance Method Summary collapse

#active?(voice_id) ⇒ Boolean

Checks if a voice_id is in list_voices.
#banned?(voice_id) ⇒ Boolean

Checks safety control on a single voice for “BAN”.
#compose_music(options = {}) ⇒ Object

1.
#compose_music_detailed(options = {}) ⇒ Object

3.
#compose_music_stream(options = {}, &block) ⇒ Object

2.
#create_from_generated_voice(voice_name, voice_description, generated_voice_id, labels: nil, played_not_selected_voice_ids: nil) ⇒ Hash

Create a Voice # (POST /v1/text-to-voice/create) #.
#create_music_plan(options = {}) ⇒ Object

4.
#create_voice(name, samples = [], options = {}) ⇒ Object

Creates a new voice NOTE: This method may require a multipart form request if you are uploading sample audio files.
#delete_voice(voice_id) ⇒ Hash

Deletes a voice from your account.
#design_voice(voice_description, options = {}) ⇒ Hash

Designs a voice based on a description Documentation: elevenlabs.io/docs/api-reference/text-to-voice/design.
#edit_voice(voice_id, samples = [], options = {}) ⇒ Object

Edit a Voice # (POST /v1/voices/voice_id/edit) #.
#get_voice(voice_id) ⇒ Hash

Retrieves details about a single voice.
#initialize(api_key: nil, open_timeout: 5, read_timeout: 120) ⇒ Client constructor

Note the default param: ‘api_key: nil`.
#list_models ⇒ Hash

Gets a list of available models Documentation: elevenlabs.io/docs/api-reference/models/list.
#list_voices ⇒ Hash

Retrieves all voices associated with your Elevenlabs account Documentation: elevenlabs.io/docs/api-reference/voices.
#sound_generation(text, options = {}) ⇒ String

Convert text to sound effects and retrieve audio (binary data) Documentation: elevenlabs.io/docs/api-reference/sound-generation.
#text_to_dialogue(inputs, model_id = nil, settings = {}, seed = nil) ⇒ String

Converts a list of text and voice ID pairs into speech (dialogue) and returns audio.
#text_to_speech(voice_id, text, options = {}) ⇒ String

Convert text to speech and retrieve audio (binary data) Documentation: elevenlabs.io/docs/api-reference/text-to-speech/convert.
#text_to_speech_stream(voice_id, text, options = {}, &block) ⇒ Object

Text-to-Speech-Stream # (POST /v1/text-to-speech/voice_id)/stream #.

Constructor Details

#initialize(api_key: nil, open_timeout: 5, read_timeout: 120) ⇒ `Client`

Note the default param: ‘api_key: nil`

# File 'lib/elevenlabs/client.rb', line 12

def initialize(api_key: nil, open_timeout: 5, read_timeout: 120)
  # If the caller doesn’t provide an api_key, use the gem-wide config
  @api_key = api_key || Elevenlabs.configuration&.api_key

  @connection = Faraday.new(url: BASE_URL) do |conn|
    conn.request :url_encoded
    conn.response :raise_error
    conn.options.open_timeout = open_timeout   # time to open connection
    conn.options.timeout      = read_timeout   # time to wait for response
    conn.adapter Faraday.default_adapter
  end
end

Instance Method Details

#active?(voice_id) ⇒ `Boolean`

Checks if a voice_id is in list_voices

Parameters:

voice_id (String)

Returns:

(Boolean)

# File 'lib/elevenlabs/client.rb', line 447

def active?(voice_id)
  active_voices = list_voices["voices"].map{|voice| voice["voice_id"]}
  voice_id.in?(active_voices)
end

#banned?(voice_id) ⇒ `Boolean`

Checks safety control on a single voice for “BAN”

Parameters:

voice_id (String)

Returns:

(Boolean)

# File 'lib/elevenlabs/client.rb', line 434

def banned?(voice_id)
  voice = get_voice(voice_id)
  voice["safety_control"] == "BAN"
end

#compose_music(options = {}) ⇒ `Object`

Compose music (basic)

POST /v1/music

# File 'lib/elevenlabs/client.rb', line 458

def compose_music(options = {})
  endpoint = "/v1/music"
  request_body = {
    prompt: options[:prompt],
    composition_plan: options[:composition_plan],
    music_length_ms: options[:music_length_ms],
    model_id: options[:model_id] || "music_v1"
  }.compact

  headers = default_headers.merge("Accept" => "audio/mpeg")
  query = {}
  query[:output_format] = options[:output_format] if options[:output_format]

  response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
    req.headers = headers
    req.body = request_body.to_json
  end

  response.body # raw binary audio
rescue Faraday::ClientError => e
  handle_error(e)
end

#compose_music_detailed(options = {}) ⇒ `Object`

Compose detailed music (metadata + audio)

POST /v1/music/detailed

# File 'lib/elevenlabs/client.rb', line 511

def compose_music_detailed(options = {})
  endpoint = "/v1/music/detailed"
  request_body = {
    prompt: options[:prompt],
    composition_plan: options[:composition_plan],
    music_length_ms: options[:music_length_ms],
    model_id: options[:model_id] || "music_v1"
  }.compact

  headers = default_headers
  query = {}
  query[:output_format] = options[:output_format] if options[:output_format]

  response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
    req.headers = headers
    req.body = request_body.to_json
  end

  response.body # multipart/mixed with JSON + binary audio
rescue Faraday::ClientError => e
  handle_error(e)
end

#compose_music_stream(options = {}, &block) ⇒ `Object`

Stream music

POST /v1/music/stream

# File 'lib/elevenlabs/client.rb', line 483

def compose_music_stream(options = {}, &block)
  endpoint = "/v1/music/stream"
  request_body = {
    prompt: options[:prompt],
    composition_plan: options[:composition_plan],
    music_length_ms: options[:music_length_ms],
    model_id: options[:model_id] || "music_v1"
  }.compact

  headers = default_headers.merge("Accept" => "audio/mpeg")
  query = {}
  query[:output_format] = options[:output_format] if options[:output_format]

  @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
    req.options.on_data = Proc.new do |chunk, _|
      block.call(chunk) if block
    end
    req.headers = headers
    req.body = request_body.to_json
  end

  nil # audio streamed via block
rescue Faraday::ClientError => e
  handle_error(e)
end

#create_from_generated_voice(voice_name, voice_description, generated_voice_id, labels: nil, played_not_selected_voice_ids: nil) ⇒ `Hash`

Create a Voice #

(POST /v1/text-to-voice/create)              #

Creates a voice from the designed voice generated_voice_id Documentation: elevenlabs.io/docs/api-reference/text-to-voice

Parameters:

voice_name (String) —
- Name of the voice
voice_description (String) —
- Description of the voice (20-1000 characters)
generated_voice_id (String) —
- The generated voice ID from design_voice
labels (Hash) (defaults to: nil) —
- Optional metadata for the voice
played_not_selected_voice_ids (Array<String>) (defaults to: nil) —
- Optional list of voice IDs played but not selected

Returns:

(Hash) —

JSON response containing voice_id and other voice details

# File 'lib/elevenlabs/client.rb', line 244

def create_from_generated_voice(voice_name, voice_description, generated_voice_id, labels: nil, played_not_selected_voice_ids: nil)
  endpoint = "/v1/text-to-voice"
  request_body = {
    voice_name: voice_name,
    voice_description: voice_description,
    generated_voice_id: generated_voice_id,
    labels: labels,
    played_not_selected_voice_ids: played_not_selected_voice_ids
  }.compact

  response = @connection.post(endpoint) do |req|
    req.headers = default_headers
    req.body = request_body.to_json
  end
  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#create_music_plan(options = {}) ⇒ `Object`

Create a composition plan

POST /v1/music/plan

# File 'lib/elevenlabs/client.rb', line 537

def create_music_plan(options = {})
  endpoint = "/v1/music/plan"
  request_body = {
    prompt: options[:prompt],
    music_length_ms: options[:music_length_ms],
    source_composition_plan: options[:source_composition_plan],
    model_id: options[:model_id] || "music_v1"
  }.compact

  response = @connection.post(endpoint) do |req|
    req.headers = default_headers
    req.body = request_body.to_json
  end

  JSON.parse(response.body, symbolize_names: true)
rescue Faraday::ClientError => e
  handle_error(e)
end

#create_voice(name, samples = [], options = {}) ⇒ `Object`

Creates a new voice NOTE: This method may require a multipart form request

if you are uploading sample audio files.

Parameters:

name (String) —
- name of the voice
samples (File) (defaults to: []) —
- array of files to train the voice
options (Hash) (defaults to: {}) —
- additional parameters
:description => String

# File 'lib/elevenlabs/client.rb', line 333

def create_voice(name, samples = [], options = {})
  endpoint = "/v1/voices/add"

  # Ensure Faraday handles multipart form data
  mp_connection = Faraday.new(url: BASE_URL) do |conn|
    conn.request :multipart
    conn.response :raise_error
    conn.adapter Faraday.default_adapter
  end

  # Build multipart form parameters
  form_params = {
    "name" => name,
    "description" => options[:description] || ""
  }

  # Convert File objects to multipart upload format
  sample_files = []
  samples.each_with_index do |sample_file, i|
    sample_files << ["files", Faraday::UploadIO.new(sample_file.path, "audio/mpeg")]
  end

  # Perform the POST request
  response = mp_connection.post(endpoint) do |req|
    req.headers["xi-api-key"] = @api_key
    req.body = form_params.merge(sample_files.to_h)
  end

  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#delete_voice(voice_id) ⇒ `Hash`

Deletes a voice from your account

Parameters:

voice_id (String)

Returns:

(Hash) —

response

# File 'lib/elevenlabs/client.rb', line 415

def delete_voice(voice_id)
  endpoint = "/v1/voices/#{voice_id}"
  response = @connection.delete(endpoint) do |req|
    req.headers = default_headers
  end

  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#design_voice(voice_description, options = {}) ⇒ `Hash`

Designs a voice based on a description Documentation: elevenlabs.io/docs/api-reference/text-to-voice/design

Parameters:

voice_description (String) —
- Description of the voice (20-1000 characters)
options (Hash) (defaults to: {}) —
- Optional parameters
:output_format => String (e.g., “mp3_44100_192”, default: “mp3_44100_192”) :model_id => String (e.g., “eleven_multilingual_ttv_v2”, “eleven_ttv_v3”) :text => String (100-1000 characters, optional) :auto_generate_text => Boolean (default: false) :loudness => Float (-1 to 1, default: 0.5) :seed => Integer (0 to 2147483647, optional) :guidance_scale => Float (0 to 100, default: 5) :stream_previews => Boolean (default: false) :remixing_session_id => String (optional) :remixing_session_iteration_id => String (optional) :quality => Float (-1 to 1, optional) :reference_audio_base64 => String (base64 encoded audio, optional, requires eleven_ttv_v3) :prompt_strength => Float (0 to 1, optional, requires eleven_ttv_v3)

Returns:

(Hash) —

JSON response containing previews and text

# File 'lib/elevenlabs/client.rb', line 201

def design_voice(voice_description, options = {})
  endpoint = "/v1/text-to-voice/design"
  request_body = { voice_description: voice_description }

  # Add optional parameters if provided
  request_body[:output_format] = options[:output_format] if options[:output_format]
  request_body[:model_id] = options[:model_id] if options[:model_id]
  request_body[:text] = options[:text] if options[:text]
  request_body[:auto_generate_text] = options[:auto_generate_text] unless options[:auto_generate_text].nil?
  request_body[:loudness] = options[:loudness] if options[:loudness]
  request_body[:seed] = options[:seed] if options[:seed]
  request_body[:guidance_scale] = options[:guidance_scale] if options[:guidance_scale]
  request_body[:stream_previews] = options[:stream_previews] unless options[:stream_previews].nil?
  request_body[:remixing_session_id] = options[:remixing_session_id] if options[:remixing_session_id]
  request_body[:remixing_session_iteration_id] = options[:remixing_session_iteration_id] if options[:remixing_session_iteration_id]
  request_body[:quality] = options[:quality] if options[:quality]
  request_body[:reference_audio_base64] = options[:reference_audio_base64] if options[:reference_audio_base64]
  request_body[:prompt_strength] = options[:prompt_strength] if options[:prompt_strength]

  response = @connection.post(endpoint) do |req|
    req.headers = default_headers
    req.body = request_body.to_json
  end

  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#edit_voice(voice_id, samples = [], options = {}) ⇒ `Object`

Edit a Voice #

(POST /v1/voices/{voice_id}/edit)       #

Updates an existing voice options [String] name options [String] description

Parameters:

voice_id (String)
samples (Array<File>) (defaults to: [])
options (Hash) (defaults to: {})

# File 'lib/elevenlabs/client.rb', line 378

def edit_voice(voice_id, samples = [], options = {})
  endpoint = "/v1/voices/#{voice_id}/edit"

  # Force text fields to be strings.
  form_params = {
    "name"        => options[:name].to_s,
    "description" => (options[:description] || "").to_s
  }

  form_params["files[]"] = samples.map do |sample_file|
    Faraday::UploadIO.new(sample_file.path, "audio/mpeg", File.basename(sample_file.path))
  end

  mp_connection = Faraday.new(url: BASE_URL) do |conn|
    conn.request :multipart
    conn.response :raise_error
    conn.adapter Faraday.default_adapter
  end

  response = mp_connection.post(endpoint) do |req|
    req.headers["xi-api-key"] = @api_key
    req.body = form_params
  end

  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#get_voice(voice_id) ⇒ `Hash`

Retrieves details about a single voice

Parameters:

voice_id (String)

Returns:

(Hash) —

Details of the voice

# File 'lib/elevenlabs/client.rb', line 310

def get_voice(voice_id)
  endpoint = "/v1/voices/#{voice_id}"
  response = @connection.get(endpoint) do |req|
    req.headers = default_headers
  end
  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#list_models ⇒ `Hash`

Gets a list of available models Documentation: elevenlabs.io/docs/api-reference/models/list

Returns:

(Hash) —

The JSON response containing an array of models

# File 'lib/elevenlabs/client.rb', line 291

def list_models
  endpoint = "/v1/models"
  response = @connection.get(endpoint) do |req|
    req.headers = default_headers
  end
  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#list_voices ⇒ `Hash`

Retrieves all voices associated with your Elevenlabs account Documentation: elevenlabs.io/docs/api-reference/voices

Returns:

(Hash) —

The JSON response containing an array of voices

# File 'lib/elevenlabs/client.rb', line 272

def list_voices
  endpoint = "/v1/voices"
  response = @connection.get(endpoint) do |req|
    req.headers = default_headers
  end
  JSON.parse(response.body)
rescue Faraday::ClientError => e
  handle_error(e)
end

#sound_generation(text, options = {}) ⇒ `String`

Convert text to sound effects and retrieve audio (binary data) Documentation: elevenlabs.io/docs/api-reference/sound-generation

Parameters:

text (String) —
- text prompt describing the sound effect
options (Hash) (defaults to: {}) —
- optional parameters
:loop => Boolean (whether to create a looping sound effect, default: false) :duration_seconds => Float (0.5 to 30 seconds, default: nil for auto-detection) :prompt_influence => Float (0.0 to 1.0, default: 0.3) :output_format => String (e.g., “mp3_22050_32”, default: “mp3_44100_128”)

Returns:

(String) —

The binary audio data (usually an MP3).

# File 'lib/elevenlabs/client.rb', line 150

def sound_generation(text, options = {})
  endpoint = "/v1/sound-generation"
  request_body = { text: text }

  # Add optional parameters if provided
  request_body[:loop] = options[:loop] unless options[:loop].nil?
  request_body[:duration_seconds] = options[:duration_seconds] if options[:duration_seconds]
  request_body[:prompt_influence] = options[:prompt_influence] if options[:prompt_influence]

  headers = default_headers
  headers["Accept"] = "audio/mpeg"

  query = {}
  query[:output_format] = options[:output_format] if options[:output_format]

  response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
    req.headers = headers
    req.body = request_body.to_json
  end

  # Returns raw binary data (often MP3)
  response.body
rescue Faraday::ClientError => e
  handle_error(e)
end

#text_to_dialogue(inputs, model_id = nil, settings = {}, seed = nil) ⇒ `String`

Converts a list of text and voice ID pairs into speech (dialogue) and returns audio. Documentation: elevenlabs.io/docs/api-reference/text-to-dialogue/convert

Parameters:

inputs (Array[Objects]) —
- A list of dialogue inputs, each containing text and a voice ID which will be converted into speech
:text => String :voice_id => String
model_id (String) (defaults to: nil) —
- optional Identifier of the model to be used
settings (Hash) (defaults to: {}) —
- optinal Settings controlling the dialogue generation
:stability => double - 0.0 = Creative, 0.5 = Natural, 1.0 = Robust :use_speaker_boost => boolean
seed (Integer) (defaults to: nil) —
- optional Best effort to sample deterministically.

Returns:

(String) —

The binary audio data (usually an MP3).

# File 'lib/elevenlabs/client.rb', line 111

def text_to_dialogue(inputs, model_id = nil, settings = {}, seed = nil)
  endpoint = "/v1/text-to-dialogue"
  request_body = {}.tap do |r|
    r[:inputs] = inputs
    r[:model_id] = model_id if model_id
    r[:settings] = settings unless settings.empty?
    r[:seed] = seed if seed
  end

  headers = default_headers
  headers["Accept"] = "audio/mpeg"

  response = @connection.post(endpoint) do |req|
    req.headers = headers
    req.body = request_body.to_json
  end

  # Returns raw binary data (often MP3)
  response.body
rescue Faraday::ClientError => e
  handle_error(e)
end

#text_to_speech(voice_id, text, options = {}) ⇒ `String`

Convert text to speech and retrieve audio (binary data) Documentation: elevenlabs.io/docs/api-reference/text-to-speech/convert

Parameters:

voice_id (String) —
- the ID of the voice to use
text (String) —
- text to synthesize
options (Hash) (defaults to: {}) —
- optional TTS parameters
:model_id => String (e.g. “eleven_monolingual_v1” or “eleven_multilingual_v1”) :voice_settings => Hash (stability, similarity_boost, style, use_speaker_boost, etc.) :optimize_streaming => Boolean (whether to receive chunked streaming audio)

Returns:

(String) —

The binary audio data (usually an MP3).

# File 'lib/elevenlabs/client.rb', line 41

def text_to_speech(voice_id, text, options = {})
  endpoint = "/v1/text-to-speech/#{voice_id}"
  request_body = { text: text }

  # If user provided voice_settings, add them
  if options[:voice_settings]
    request_body[:voice_settings] = options[:voice_settings]
  end

  # If user specified a model_id, add it
  request_body[:model_id] = options[:model_id] if options[:model_id]

  # If user wants streaming optimization
  headers = default_headers
  if options[:optimize_streaming]
    headers["Accept"] = "audio/mpeg"
    headers["Transfer-Encoding"] = "chunked"
  end

  response = @connection.post(endpoint) do |req|
    req.headers = headers
    req.body = request_body.to_json
  end

  # Returns raw binary data (often MP3)
  response.body
rescue Faraday::ClientError => e
  handle_error(e)
end

#text_to_speech_stream(voice_id, text, options = {}, &block) ⇒ `Object`

Text-to-Speech-Stream # (POST /v1/text-to-speech/voice_id)/stream #

# File 'lib/elevenlabs/client.rb', line 75

def text_to_speech_stream(voice_id, text, options = {}, &block)
  endpoint = "/v1/text-to-speech/#{voice_id}/stream?output_format=mp3_44100_128"
  request_body = { text: text, model_id: options[:model_id] || "eleven_multilingual_v2" }

  headers = default_headers
  headers["Accept"] = "audio/mpeg"

  response = @connection.post(endpoint, request_body.to_json, headers) do |req|
    req.options.on_data = Proc.new do |chunk, _|
      block.call(chunk) if block_given?
    end
  end

  response
rescue Faraday::ClientError => e
  handle_error(e)
end

Class: Elevenlabs::Client

Constant Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_key: nil, open_timeout: 5, read_timeout: 120) ⇒ Client

Instance Method Details

#active?(voice_id) ⇒ Boolean

#banned?(voice_id) ⇒ Boolean

#compose_music(options = {}) ⇒ Object

#compose_music_detailed(options = {}) ⇒ Object

#compose_music_stream(options = {}, &block) ⇒ Object

#create_from_generated_voice(voice_name, voice_description, generated_voice_id, labels: nil, played_not_selected_voice_ids: nil) ⇒ Hash

#create_music_plan(options = {}) ⇒ Object

#create_voice(name, samples = [], options = {}) ⇒ Object

#delete_voice(voice_id) ⇒ Hash

#design_voice(voice_description, options = {}) ⇒ Hash

#edit_voice(voice_id, samples = [], options = {}) ⇒ Object

#get_voice(voice_id) ⇒ Hash

#list_models ⇒ Hash

#list_voices ⇒ Hash

#sound_generation(text, options = {}) ⇒ String

#text_to_dialogue(inputs, model_id = nil, settings = {}, seed = nil) ⇒ String

#text_to_speech(voice_id, text, options = {}) ⇒ String

#text_to_speech_stream(voice_id, text, options = {}, &block) ⇒ Object

#initialize(api_key: nil, open_timeout: 5, read_timeout: 120) ⇒ `Client`

#active?(voice_id) ⇒ `Boolean`

#banned?(voice_id) ⇒ `Boolean`

#compose_music(options = {}) ⇒ `Object`

#compose_music_detailed(options = {}) ⇒ `Object`

#compose_music_stream(options = {}, &block) ⇒ `Object`

#create_from_generated_voice(voice_name, voice_description, generated_voice_id, labels: nil, played_not_selected_voice_ids: nil) ⇒ `Hash`

#create_music_plan(options = {}) ⇒ `Object`

#create_voice(name, samples = [], options = {}) ⇒ `Object`

#delete_voice(voice_id) ⇒ `Hash`

#design_voice(voice_description, options = {}) ⇒ `Hash`

#edit_voice(voice_id, samples = [], options = {}) ⇒ `Object`

#get_voice(voice_id) ⇒ `Hash`

#list_models ⇒ `Hash`

#list_voices ⇒ `Hash`

#sound_generation(text, options = {}) ⇒ `String`

#text_to_dialogue(inputs, model_id = nil, settings = {}, seed = nil) ⇒ `String`

#text_to_speech(voice_id, text, options = {}) ⇒ `String`

#text_to_speech_stream(voice_id, text, options = {}, &block) ⇒ `Object`