Module: SpeechToText::Util

Included in:: AmazonS2T, GoogleS2T, IbmWatsonS2T, MozillaDeepspeechS2T, SpeechmaticsS2T

Defined in:: lib/speech_to_text/util.rb

Overview

rubocop:disable Style/Documentation

Class Method Summary collapse

.captions_json(file_path:, file_name:, localeName:, locale:) ⇒ Object

rubocop:disable Naming/UncommunicativeMethodParamName.
.recording_json(file_path:, record_id:, timestamp:, language:) ⇒ Object

rubocop:disable Metrics/MethodLength.
.seconds_to_timestamp(number) ⇒ Object

function to convert the time to a timestamp rubocop:disable Metrics/MethodLength.
.video_to_audio(video_file_path:, video_name:, video_content_type:, audio_file_path:, audio_name:, audio_content_type:, **duration) ⇒ Object

rubocop:enable Metrics/MethodLength def video_to_audio rubocop:disable Metrics/ParameterLists.
.write_to_webvtt(vtt_file_path:, vtt_file_name:, text_array:, start_time:) ⇒ Object

rubocop:enable Metrics/MethodLength create and write the webvtt file rubocop:disable Metrics/MethodLength.

Class Method Details

.captions_json(file_path:, file_name:, localeName:, locale:) ⇒ `Object`

rubocop:disable Naming/UncommunicativeMethodParamName

# File 'lib/speech_to_text/util.rb', line 96

def self.captions_json(file_path:,
                       file_name:,
                       # rubocop:disable Naming/VariableName
                       localeName:,
                       # rubocop:enable Naming/VariableName
                       locale:)
  captions_file_name = "#{file_path}/#{file_name}"
  captions_file = File.open(captions_file_name, 'w')
  line = "[{\"localeName\": \"#{localeName}\", \"locale\": \"#{locale}\"}]"
  captions_file.puts line
  captions_file.close
end

.recording_json(file_path:, record_id:, timestamp:, language:) ⇒ `Object`

rubocop:disable Metrics/MethodLength

# File 'lib/speech_to_text/util.rb', line 111

def self.recording_json(file_path:,
                        record_id:,
                        timestamp:,
                        language:)
  filename = "#{file_path}/#{record_id}-#{timestamp}-track.json"
  file = File.open(filename, 'w')
  file.puts '{'
  file.puts "\"record_id\": \"#{record_id}\","
  file.puts '"kind": "subtitles",'
  file.puts "\"lang\": \"#{language}\","
  file.puts '"label": "English",'
  file.puts "\"original_filename\": \"caption_#{language}.vtt\","
  file.puts "\"temp_filename\": \"#{record_id}-#{timestamp}-track.txt\""
  file.puts '}'
  file.close
end

.seconds_to_timestamp(number) ⇒ `Object`

function to convert the time to a timestamp rubocop:disable Metrics/MethodLength

# File 'lib/speech_to_text/util.rb', line 17

def self.seconds_to_timestamp(number) # rubocop:disable Metrics/AbcSize
  hh = (number / 3600).floor
  number = number % 3600
  mm = (number / 60).floor
  ss = (number % 60).round(3)
  ss = "0#{ss}" if ss < 10
  parts = ss.to_s.split('.')
  if parts.length > 1
    1.upto(3 - parts[1].length) { parts[1] = parts[1].concat('0') }
    ss = "#{parts[0]}.#{parts[1]}"
  else
    ss = parts[0].concat('.000')
  end
  mm = "0#{mm}" if mm < 10
  hh = "0#{hh}" if hh < 10
  "#{hh}:#{mm}:#{ss}"
end

.video_to_audio(video_file_path:, video_name:, video_content_type:, audio_file_path:, audio_name:, audio_content_type:, **duration) ⇒ `Object`

rubocop:enable Metrics/MethodLength def video_to_audio rubocop:disable Metrics/ParameterLists

# File 'lib/speech_to_text/util.rb', line 131

def self.video_to_audio(video_file_path:,
                        video_name:,
                        video_content_type:,
                        audio_file_path:,
                        audio_name:,
                        audio_content_type:,
                        **duration)
  # rubocop:enable Metrics/ParameterLists
  video_to_audio_command = ''
  if duration.empty?
    video_to_audio_command = "ffmpeg -y -i #{video_file_path}/#{video_name}.#{video_content_type} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
  elsif duration[:start_time].nil? && duration[:end_time] != nil
    video_to_audio_command = "ffmpeg -y -ss #{0.to_i} -i #{video_file_path}/#{video_name}.#{video_content_type} -t #{duration[:end_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
  elsif duration[:end_time].nil? && duration[:start_time] != nil
    video_to_audio_command = "ffmpeg -y -ss #{duration[:start_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
  else
    video_to_audio_command = "ffmpeg -y -t #{duration[:end_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -ss #{duration[:start_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}"
  end  

    Open3.popen2e(video_to_audio_command) do |stdin, stdout_err, wait_thr|
      while line = stdout_err.gets
        puts "#{line}"
      end

      exit_status = wait_thr.value
      unless exit_status.success?
        puts '---------------------'
        puts "FAILED to execute --> #{video_to_audio_command}"
        puts '---------------------'
      end
    end

    #Open3.popen3(video_to_audio_command.to_s) do |stdin, stdout, stderr, wait_thr|
    #  puts "stdout is:" + stdout.read
    #  puts "stderr is:" + stderr.read
    #end
end

.write_to_webvtt(vtt_file_path:, vtt_file_name:, text_array:, start_time:) ⇒ `Object`

rubocop:enable Metrics/MethodLength create and write the webvtt file rubocop:disable Metrics/MethodLength

# File 'lib/speech_to_text/util.rb', line 37

def self.write_to_webvtt(vtt_file_path:, # rubocop:disable Metrics/AbcSize
                         vtt_file_name:,
                         text_array:,
                         start_time:)
  # Array format 
  # text_array = [start_timestamp, end_timestamp, word, start_time, end_time, word, ...]
  
  # if we cut first few minutes from the audio then 
  # start time will be replaced instead of 0
  start_time = start_time.to_i 

  filename = "#{vtt_file_path}/#{vtt_file_name}"
  file = File.open(filename, 'w')
  file.print "WEBVTT"

  i = block_number = 0
  
  #all the words are at position [2,5,8,11...]
  word_index = 2  

  # one block will give total 10 words on screen at a time
  # which contains total 30 index 
  # each word has 3 indexes in text_array [start_timestamp, end_timestamp, word,...]
  block_size = 30

  # each block contains 10 words index range o to 29
  # last end time will be at index = 28
  end_timestamp = 28

  # we need new lines after every 5 words so 6th word will be at index = 17 (6*3 - 1) 
  line_space_index = 17 

  while i < text_array.length
   
    if i%3 == word_index  #if index has word then print word
      if i%block_size == line_space_index # if this is 6th word then print new line
        file.puts
      end
      file.print "#{text_array[i]} "
    elsif i%block_size == 0  #if index is 0,30,60... means starting a new block
      block_number += 1
      file.puts "\n\n"
      file.puts block_number  #print block number 
      file.print "#{seconds_to_timestamp(text_array[i] + start_time)} "  #print start timestamps
      if i + end_timestamp < text_array.length  # End timestamp will be at 28th index in block of 30 indexes (10 words)
        file.puts "--> #{seconds_to_timestamp(text_array[i+end_timestamp] + start_time)}"
      else  # For last block, there will not be total 30 indexes, so end timestamp will be second last index
        file.puts "--> #{seconds_to_timestamp(text_array[text_array.length - 2] + start_time)}"
      end
    else          
    end
    i += 1
  end

  file.close
end

Module: SpeechToText::Util

Overview

Class Method Summary collapse

Class Method Details

.captions_json(file_path:, file_name:, localeName:, locale:) ⇒ Object

.recording_json(file_path:, record_id:, timestamp:, language:) ⇒ Object

.seconds_to_timestamp(number) ⇒ Object

.video_to_audio(video_file_path:, video_name:, video_content_type:, audio_file_path:, audio_name:, audio_content_type:, **duration) ⇒ Object

.write_to_webvtt(vtt_file_path:, vtt_file_name:, text_array:, start_time:) ⇒ Object

.captions_json(file_path:, file_name:, localeName:, locale:) ⇒ `Object`

.recording_json(file_path:, record_id:, timestamp:, language:) ⇒ `Object`

.seconds_to_timestamp(number) ⇒ `Object`

.video_to_audio(video_file_path:, video_name:, video_content_type:, audio_file_path:, audio_name:, audio_content_type:, **duration) ⇒ `Object`

.write_to_webvtt(vtt_file_path:, vtt_file_name:, text_array:, start_time:) ⇒ `Object`