Module: SpeechToText::Util
- Included in:
- AmazonS2T, GoogleS2T, IbmWatsonS2T, MozillaDeepspeechS2T, SpeechmaticsS2T
- Defined in:
- lib/speech_to_text/util.rb
Overview
rubocop:disable Style/Documentation
Class Method Summary collapse
-
.captions_json(file_path:, file_name:, localeName:, locale:) ⇒ Object
rubocop:disable Naming/UncommunicativeMethodParamName.
-
.recording_json(file_path:, record_id:, timestamp:, language:) ⇒ Object
rubocop:disable Metrics/MethodLength.
-
.seconds_to_timestamp(number) ⇒ Object
function to convert the time to a timestamp rubocop:disable Metrics/MethodLength.
-
.video_to_audio(video_file_path:, video_name:, video_content_type:, audio_file_path:, audio_name:, audio_content_type:, **duration) ⇒ Object
rubocop:enable Metrics/MethodLength def video_to_audio rubocop:disable Metrics/ParameterLists.
-
.write_to_webvtt(vtt_file_path:, vtt_file_name:, text_array:, start_time:) ⇒ Object
rubocop:enable Metrics/MethodLength create and write the webvtt file rubocop:disable Metrics/MethodLength.
Class Method Details
.captions_json(file_path:, file_name:, localeName:, locale:) ⇒ Object
rubocop:disable Naming/UncommunicativeMethodParamName
96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/speech_to_text/util.rb', line 96 def self.(file_path:, file_name:, # rubocop:disable Naming/VariableName localeName:, # rubocop:enable Naming/VariableName locale:) = "#{file_path}/#{file_name}" = File.open(, 'w') line = "[{\"localeName\": \"#{localeName}\", \"locale\": \"#{locale}\"}]" .puts line .close end |
.recording_json(file_path:, record_id:, timestamp:, language:) ⇒ Object
rubocop:disable Metrics/MethodLength
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/speech_to_text/util.rb', line 111 def self.recording_json(file_path:, record_id:, timestamp:, language:) filename = "#{file_path}/#{record_id}-#{}-track.json" file = File.open(filename, 'w') file.puts '{' file.puts "\"record_id\": \"#{record_id}\"," file.puts '"kind": "subtitles",' file.puts "\"lang\": \"#{language}\"," file.puts '"label": "English",' file.puts "\"original_filename\": \"caption_#{language}.vtt\"," file.puts "\"temp_filename\": \"#{record_id}-#{}-track.txt\"" file.puts '}' file.close end |
.seconds_to_timestamp(number) ⇒ Object
function to convert the time to a timestamp rubocop:disable Metrics/MethodLength
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/speech_to_text/util.rb', line 17 def self.(number) # rubocop:disable Metrics/AbcSize hh = (number / 3600).floor number = number % 3600 mm = (number / 60).floor ss = (number % 60).round(3) ss = "0#{ss}" if ss < 10 parts = ss.to_s.split('.') if parts.length > 1 1.upto(3 - parts[1].length) { parts[1] = parts[1].concat('0') } ss = "#{parts[0]}.#{parts[1]}" else ss = parts[0].concat('.000') end mm = "0#{mm}" if mm < 10 hh = "0#{hh}" if hh < 10 "#{hh}:#{mm}:#{ss}" end |
.video_to_audio(video_file_path:, video_name:, video_content_type:, audio_file_path:, audio_name:, audio_content_type:, **duration) ⇒ Object
rubocop:enable Metrics/MethodLength def video_to_audio rubocop:disable Metrics/ParameterLists
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
# File 'lib/speech_to_text/util.rb', line 131 def self.video_to_audio(video_file_path:, video_name:, video_content_type:, audio_file_path:, audio_name:, audio_content_type:, **duration) # rubocop:enable Metrics/ParameterLists video_to_audio_command = '' if duration.empty? video_to_audio_command = "ffmpeg -y -i #{video_file_path}/#{video_name}.#{video_content_type} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}" elsif duration[:start_time].nil? && duration[:end_time] != nil video_to_audio_command = "ffmpeg -y -ss #{0.to_i} -i #{video_file_path}/#{video_name}.#{video_content_type} -t #{duration[:end_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}" elsif duration[:end_time].nil? && duration[:start_time] != nil video_to_audio_command = "ffmpeg -y -ss #{duration[:start_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}" else video_to_audio_command = "ffmpeg -y -t #{duration[:end_time]} -i #{video_file_path}/#{video_name}.#{video_content_type} -ss #{duration[:start_time]} -ac 1 -ar 16000 #{audio_file_path}/#{audio_name}.#{audio_content_type}" end Open3.popen2e(video_to_audio_command) do |stdin, stdout_err, wait_thr| while line = stdout_err.gets puts "#{line}" end exit_status = wait_thr.value unless exit_status.success? puts '---------------------' puts "FAILED to execute --> #{video_to_audio_command}" puts '---------------------' end end #Open3.popen3(video_to_audio_command.to_s) do |stdin, stdout, stderr, wait_thr| # puts "stdout is:" + stdout.read # puts "stderr is:" + stderr.read #end end |
.write_to_webvtt(vtt_file_path:, vtt_file_name:, text_array:, start_time:) ⇒ Object
rubocop:enable Metrics/MethodLength create and write the webvtt file rubocop:disable Metrics/MethodLength
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/speech_to_text/util.rb', line 37 def self.write_to_webvtt(vtt_file_path:, # rubocop:disable Metrics/AbcSize vtt_file_name:, text_array:, start_time:) # Array format # text_array = [start_timestamp, end_timestamp, word, start_time, end_time, word, ...] # if we cut first few minutes from the audio then # start time will be replaced instead of 0 start_time = start_time.to_i filename = "#{vtt_file_path}/#{vtt_file_name}" file = File.open(filename, 'w') file.print "WEBVTT" i = block_number = 0 #all the words are at position [2,5,8,11...] word_index = 2 # one block will give total 10 words on screen at a time # which contains total 30 index # each word has 3 indexes in text_array [start_timestamp, end_timestamp, word,...] block_size = 30 # each block contains 10 words index range o to 29 # last end time will be at index = 28 = 28 # we need new lines after every 5 words so 6th word will be at index = 17 (6*3 - 1) line_space_index = 17 while i < text_array.length if i%3 == word_index #if index has word then print word if i%block_size == line_space_index # if this is 6th word then print new line file.puts end file.print "#{text_array[i]} " elsif i%block_size == 0 #if index is 0,30,60... means starting a new block block_number += 1 file.puts "\n\n" file.puts block_number #print block number file.print "#{(text_array[i] + start_time)} " #print start timestamps if i + < text_array.length # End timestamp will be at 28th index in block of 30 indexes (10 words) file.puts "--> #{(text_array[i+] + start_time)}" else # For last block, there will not be total 30 indexes, so end timestamp will be second last index file.puts "--> #{(text_array[text_array.length - 2] + start_time)}" end else end i += 1 end file.close end |