Class: SmTranscript::Transcript
- Inherits:
-
Object
- Object
- SmTranscript::Transcript
- Defined in:
- lib/sm_transcript/transcript.rb
Instance Attribute Summary collapse
-
#words ⇒ Object
readonly
Returns the value of attribute words.
Instance Method Summary collapse
-
#initialize(word_arr) ⇒ Transcript
constructor
A new instance of Transcript.
- #write_html(dest_file) ⇒ Object
-
#write_json(dest_file) ⇒ Object
The JSON format is defined at url/of/document.
-
#write_sqlite(db_id) ⇒ Object
Store transcript in a Sqlite database (though the essence of this method should work for all relational dbs).
- #write_ttml(dest_file) ⇒ Object
Constructor Details
#initialize(word_arr) ⇒ Transcript
Returns a new instance of Transcript.
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/sm_transcript/transcript.rb', line 21 def initialize(word_arr) = {} begin @words = word_arr rescue @words.length == 0 STDERR.puts "words array is empty" exit(-1) rescue @words.nil? STDERR.puts "words is nil" exit(-1) rescue @words.responds_to('each') STDERR.puts "object doesn't have an each() method" exit(-1) end end |
Instance Attribute Details
#words ⇒ Object (readonly)
Returns the value of attribute words.
19 20 21 |
# File 'lib/sm_transcript/transcript.rb', line 19 def words @words end |
Instance Method Details
#write_html(dest_file) ⇒ Object
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/sm_transcript/transcript.rb', line 39 def write_html(dest_file) begin # TODO: Do we want to notify user when overwriting existing file? # if File.exists?(dest_file) # p "overwriting existing destination file" # end File.open(dest_file, "w") do |f| # write Title into <head> for the benefit of Google Search Appliance f.puts '<head>' f.puts '</head>' f.puts '<body>' span_element = "" prev_start_time = 0 start_time = 0 if @words.nil? STDERR.puts "object doesn't have an each() method" STDERR.puts dest_file end STDERR.puts dest_file cntr = 0 @words.each do |w| cntr += 1 # p "word cntr: #{cntr}" # get the start time and reduce its granularity so that multiple # words fall within a <span> element. start_time = w.start_time.to_i/1000 if start_time.to_i == prev_start_time.to_i # append word span_element << " #{w.word}" else # create a new span_element # since prev_start_time is zero on first line, this avoids # writing a closing </span> with no opening <span> span_element = cleanup_phrase(span_element) f.puts span_element << "</span> " unless prev_start_time == 0 span_element = "<span id='T#{start_time}'>#{w.word}" prev_start_time = start_time end end # In the block above, the last word isn't written if # the start_time and prev_start_time are the same. f.puts span_element << "</span> " unless start_time != prev_start_time f.puts '</body>' f.close end # rescue @words.nil? # STDERR.puts "object doesn't have an each() method" # exit(-1) end end |
#write_json(dest_file) ⇒ Object
The JSON format is defined at url/of/document. It is the format of the static timed-text document that is passed to the player.˙
155 156 157 |
# File 'lib/sm_transcript/transcript.rb', line 155 def write_json(dest_file) end |
#write_sqlite(db_id) ⇒ Object
Store transcript in a Sqlite database (though the essence of this method should work for all relational dbs). Unlike some of the other write_xxx() methods, this one requires a @metadata array. param db_id - for SQLite, this is a filename. video_id - is a unique identifier for the video
166 167 168 169 170 171 172 173 |
# File 'lib/sm_transcript/transcript.rb', line 166 def write_sqlite(db_id) db_id = "sm-transcript" db = SQLite3::Database.open(db_id + '.sqlite3') fields = XPath.match(doc.root, inner_node_name + '[1]/*').map{|node| node.name} field_def = fields.map {|x| "%s TEXT" % x}.join(', ') end |
#write_ttml(dest_file) ⇒ Object
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# File 'lib/sm_transcript/transcript.rb', line 91 def write_ttml(dest_file) # TODO: Do we want to notify user when overwriting existing file? # if File.exists?(dest_file) # p "overwriting existing destination file" # end buf = "" bldr = Builder::XmlMarkup.new( :target => buf, :indent => 2 ) bldr.instruct! bldr.tt("xmlns" => "http://www.w3.org/2006/04/ttaf1", "xmlns:tts" => "http://www.w3.org/ns/ttml#styling", "xmlns:ttm" => "http://www.w3.org/ns/ttml#metadata", "xml:lang" => "en" ) { bldr.head { |b| b.ttm :title, 'The title of this transcript' b.ttm :desc, 'The description of this transcript' } bldr.body { bldr.div { span_element = "" prev_start_secs = 0 start_ms = end_ms = 0 start_secs = 0 @words.each do |w| # get the start time and reduce its granularity so that # multiple words form a phrase. start_secs = w.start_time.to_i/1000 if start_secs == prev_start_secs # append word end_ms = w.end_time.to_i span_element << " #{w.word}" else # create a new span_element start_secs = w.start_time.to_i/1000 bldr.p( span_element, "xml:id" => "T#{start_secs.to_s}", "begin" => "#{start_ms.to_s}ms", "dur" => "#{(end_ms - start_ms).to_s}ms", "end" => "#{end_ms.to_s}ms" ) start_ms = w.start_time.to_i end_ms = w.end_time.to_i span_element = " #{w.word}" prev_start_secs = start_secs end end # @words.each # In the block above, the last word isn't written if # the start_time and prev_start_time are the same. bldr.p( span_element, "xml:id" => "T#{start_secs.to_s}", "begin" => "#{start_ms.to_s}ms", "dur" => "#{(end_ms - start_ms).to_s}ms", "end" => "#{end_ms.to_s}ms" ) unless start_secs != prev_start_secs } } } # p buf File.open(dest_file, "w") do |f| f.puts buf f.flush end end |