Class: Gemini::Audio

Inherits:
Object
  • Object
show all
Defined in:
lib/gemini/audio.rb

Instance Method Summary collapse

Constructor Details

#initialize(client:) ⇒ Audio

Returns a new instance of Audio.



3
4
5
# File 'lib/gemini/audio.rb', line 3

def initialize(client:)
  @client = client
end

Instance Method Details

#transcribe(parameters: {}) ⇒ Object

Transcribe an audio file



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/gemini/audio.rb', line 8

def transcribe(parameters: {})
  file = parameters.delete(:file)
  file_uri = parameters.delete(:file_uri)
  model = parameters.delete(:model) || "gemini-2.5-flash"
  language = parameters.delete(:language)
  content_text = parameters.delete(:content_text) || "Transcribe this audio clip"
  
  if !file && !file_uri
    raise ArgumentError, "No audio file specified"
  end

  if file_uri
    return transcribe_with_file_uri(file_uri, model, language, content_text, parameters)
  end
  
  # Get MIME type (simple detection)
  mime_type = determine_mime_type(file)

  # Base64 encode the file
  file.rewind
  require 'base64'
  file_data = Base64.strict_encode64(file.read)
  
  # Language setting for transcription request
  if language
    content_text += " in #{language}"
  end
  
  # Build request parameters
  request_params = {
    contents: [{
      parts: [
        { text: content_text },
        { 
          inline_data: { 
            mime_type: mime_type,
            data: file_data
          } 
        }
      ]
    }]
  }
  
  # Merge additional parameters (add to top level except contents)
  parameters.each do |key, value|
    request_params[key] = value unless key == :contents
  end
  
  # Send generateContent request
  response = @client.json_post(
    path: "models/#{model}:generateContent",
    parameters: request_params
  )
  
  Gemini::Response.new(response)
end