Class: SmTranscript::Transcript
- Inherits:
-
Object
- Object
- SmTranscript::Transcript
- Defined in:
- lib/sm_transcript/transcript.rb
Instance Attribute Summary collapse
-
#words ⇒ Object
readonly
Returns the value of attribute words.
Instance Method Summary collapse
-
#initialize(word_arr) ⇒ Transcript
constructor
A new instance of Transcript.
- #write_html(dest_file) ⇒ Object
-
#write_json(dest_file) ⇒ Object
The JSON format is defined at url/of/document.
-
#write_sqlite(db_id) ⇒ Object
Store transcript in a Sqlite database (though the essence of this method should work for all relational dbs).
- #write_ttml(dest_file) ⇒ Object
Constructor Details
#initialize(word_arr) ⇒ Transcript
Returns a new instance of Transcript.
17 18 19 20 |
# File 'lib/sm_transcript/transcript.rb', line 17 def initialize(word_arr) = {} @words = word_arr end |
Instance Attribute Details
#words ⇒ Object (readonly)
Returns the value of attribute words.
15 16 17 |
# File 'lib/sm_transcript/transcript.rb', line 15 def words @words end |
Instance Method Details
#write_html(dest_file) ⇒ Object
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/sm_transcript/transcript.rb', line 22 def write_html(dest_file) # TODO: Do we want to notify user when overwriting existing file? # if File.exists?(dest_file) # p "overwriting existing destination file" # end File.open(dest_file, "w") do |f| span_element = "" prev_start_time = 0 start_time = 0 @words.each do |w| # get the start time and reduce its granularity so that multiple # words fall within a <span> element. start_time = w.start_time.to_i/1000 if start_time.to_i == prev_start_time.to_i # append word span_element << " #{w.word}" else # create a new span_element # since prev_start_time is zero on first line, this avoids # writing a closing </span> with no opening <span> span_element = cleanup_phrase(span_element) f.puts span_element << "</span> " unless prev_start_time == 0 span_element = "<span id='T#{start_time}'>#{w.word}" prev_start_time = start_time end end # In the block above, the last word isn't written if # the start_time and prev_start_time are the same. f.puts span_element << "</span> " unless start_time != prev_start_time f.close end end |
#write_json(dest_file) ⇒ Object
The JSON format is defined at url/of/document. It is the format of the static timed-text document that is passed to the player.˙
118 119 120 |
# File 'lib/sm_transcript/transcript.rb', line 118 def write_json(dest_file) end |
#write_sqlite(db_id) ⇒ Object
Store transcript in a Sqlite database (though the essence of this method should work for all relational dbs). Unlike some of the other write_xxx() methods, this one requires a @metadata array. param db_id - for SQLite, this is a filename. video_id - is a unique identifier for the video
129 130 131 132 133 134 135 136 |
# File 'lib/sm_transcript/transcript.rb', line 129 def write_sqlite(db_id) db_id = "sm-transcript" db = SQLite3::Database.open(db_id + '.sqlite3') fields = XPath.match(doc.root, inner_node_name + '[1]/*').map{|node| node.name} field_def = fields.map {|x| "%s TEXT" % x}.join(', ') end |
#write_ttml(dest_file) ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/sm_transcript/transcript.rb', line 54 def write_ttml(dest_file) # TODO: Do we want to notify user when overwriting existing file? # if File.exists?(dest_file) # p "overwriting existing destination file" # end buf = "" bldr = Builder::XmlMarkup.new( :target => buf, :indent => 2 ) bldr.instruct! bldr.tt("xmlns" => "http://www.w3.org/2006/04/ttaf1", "xmlns:tts" => "http://www.w3.org/ns/ttml#styling", "xmlns:ttm" => "http://www.w3.org/ns/ttml#metadata", "xml:lang" => "en" ) { bldr.head { |b| b.ttm :title, 'The title of this transcript' b.ttm :desc, 'The description of this transcript' } bldr.body { bldr.div { span_element = "" prev_start_secs = 0 start_ms = end_ms = 0 start_secs = 0 @words.each do |w| # get the start time and reduce its granularity so that # multiple words form a phrase. start_secs = w.start_time.to_i/1000 if start_secs == prev_start_secs # append word end_ms = w.end_time.to_i span_element << " #{w.word}" else # create a new span_element start_secs = w.start_time.to_i/1000 bldr.p( span_element, "xml:id" => "T#{start_secs.to_s}", "begin" => "#{start_ms.to_s}ms", "dur" => "#{(end_ms - start_ms).to_s}ms", "end" => "#{end_ms.to_s}ms" ) start_ms = w.start_time.to_i end_ms = w.end_time.to_i span_element = " #{w.word}" prev_start_secs = start_secs end end # @words.each # In the block above, the last word isn't written if # the start_time and prev_start_time are the same. bldr.p( span_element, "xml:id" => "T#{start_secs.to_s}", "begin" => "#{start_ms.to_s}ms", "dur" => "#{(end_ms - start_ms).to_s}ms", "end" => "#{end_ms.to_s}ms" ) unless start_secs != prev_start_secs } } } # p buf File.open(dest_file, "w") do |f| f.puts buf f.flush end end |