Class: SmTranscript::Transcript
- Inherits:
-
Object
- Object
- SmTranscript::Transcript
- Defined in:
- lib/sm_transcript/transcript.rb
Instance Attribute Summary collapse
-
#words ⇒ Object
readonly
Returns the value of attribute words.
Instance Method Summary collapse
-
#initialize(word_arr) ⇒ Transcript
constructor
A new instance of Transcript.
- #write_html(dest_file) ⇒ Object
-
#write_json(dest_file) ⇒ Object
The JSON format is defined at url/of/document.
-
#write_sqlite(db_id) ⇒ Object
Store transcript in a Sqlite database (though the essence of this method should work for all relational dbs).
- #write_ttml(dest_file) ⇒ Object
Constructor Details
#initialize(word_arr) ⇒ Transcript
Returns a new instance of Transcript.
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/sm_transcript/transcript.rb', line 17 def initialize(word_arr) @metadata = {} begin @words = word_arr rescue @words.length == 0 STDERR.puts "words array is empty" exit(-1) rescue @words.nil? STDERR.puts "words is nil" exit(-1) rescue @words.responds_to('each') STDERR.puts "object doesn't have an each() method" exit(-1) end end |
Instance Attribute Details
#words ⇒ Object (readonly)
Returns the value of attribute words.
15 16 17 |
# File 'lib/sm_transcript/transcript.rb', line 15 def words @words end |
Instance Method Details
#write_html(dest_file) ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/sm_transcript/transcript.rb', line 35 def write_html(dest_file) begin # TODO: Do we want to notify user when overwriting existing file? # if File.exists?(dest_file) # p "overwriting existing destination file" # end File.open(dest_file, "w") do |f| span_element = "" prev_start_time = 0 start_time = 0 if @words.nil? STDERR.puts "object doesn't have an each() method" STDERR.puts dest_file end STDERR.puts dest_file @words.each do |w| # get the start time and reduce its granularity so that multiple # words fall within a <span> element. start_time = w.start_time.to_i/1000 if start_time.to_i == prev_start_time.to_i # append word span_element << " #{w.word}" else # create a new span_element # since prev_start_time is zero on first line, this avoids # writing a closing </span> with no opening <span> span_element = cleanup_phrase(span_element) f.puts span_element << "</span> " unless prev_start_time == 0 span_element = "<span id='T#{start_time}'>#{w.word}" prev_start_time = start_time end end # In the block above, the last word isn't written if # the start_time and prev_start_time are the same. f.puts span_element << "</span> " unless start_time != prev_start_time f.close end # rescue @words.nil? # STDERR.puts "object doesn't have an each() method" # exit(-1) end end |
#write_json(dest_file) ⇒ Object
The JSON format is defined at url/of/document. It is the format of the static timed-text document that is passed to the player.˙
142 143 144 |
# File 'lib/sm_transcript/transcript.rb', line 142 def write_json(dest_file) end |
#write_sqlite(db_id) ⇒ Object
Store transcript in a Sqlite database (though the essence of this method should work for all relational dbs). Unlike some of the other write_xxx() methods, this one requires a @metadata array. param db_id - for SQLite, this is a filename. video_id - is a unique identifier for the video
153 154 155 156 157 158 159 160 |
# File 'lib/sm_transcript/transcript.rb', line 153 def write_sqlite(db_id) db_id = "sm-transcript" db = SQLite3::Database.open(db_id + '.sqlite3') fields = XPath.match(doc.root, inner_node_name + '[1]/*').map{|node| node.name} field_def = fields.map {|x| "%s TEXT" % x}.join(', ') end |
#write_ttml(dest_file) ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/sm_transcript/transcript.rb', line 78 def write_ttml(dest_file) # TODO: Do we want to notify user when overwriting existing file? # if File.exists?(dest_file) # p "overwriting existing destination file" # end buf = "" bldr = Builder::XmlMarkup.new( :target => buf, :indent => 2 ) bldr.instruct! bldr.tt("xmlns" => "http://www.w3.org/2006/04/ttaf1", "xmlns:tts" => "http://www.w3.org/ns/ttml#styling", "xmlns:ttm" => "http://www.w3.org/ns/ttml#metadata", "xml:lang" => "en" ) { bldr.head { |b| b.ttm :title, 'The title of this transcript' b.ttm :desc, 'The description of this transcript' } bldr.body { bldr.div { span_element = "" prev_start_secs = 0 start_ms = end_ms = 0 start_secs = 0 @words.each do |w| # get the start time and reduce its granularity so that # multiple words form a phrase. start_secs = w.start_time.to_i/1000 if start_secs == prev_start_secs # append word end_ms = w.end_time.to_i span_element << " #{w.word}" else # create a new span_element start_secs = w.start_time.to_i/1000 bldr.p( span_element, "xml:id" => "T#{start_secs.to_s}", "begin" => "#{start_ms.to_s}ms", "dur" => "#{(end_ms - start_ms).to_s}ms", "end" => "#{end_ms.to_s}ms" ) start_ms = w.start_time.to_i end_ms = w.end_time.to_i span_element = " #{w.word}" prev_start_secs = start_secs end end # @words.each # In the block above, the last word isn't written if # the start_time and prev_start_time are the same. bldr.p( span_element, "xml:id" => "T#{start_secs.to_s}", "begin" => "#{start_ms.to_s}ms", "dur" => "#{(end_ms - start_ms).to_s}ms", "end" => "#{end_ms.to_s}ms" ) unless start_secs != prev_start_secs } } } # p buf File.open(dest_file, "w") do |f| f.puts buf f.flush end end |