Class: SmTranscript::Transcript

Inherits:
Object
  • Object
show all
Defined in:
lib/sm_transcript/transcript.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(word_arr) ⇒ Transcript

Returns a new instance of Transcript.



17
18
19
20
# File 'lib/sm_transcript/transcript.rb', line 17

def initialize(word_arr)
   = {}
  @words = word_arr
end

Instance Attribute Details

#wordsObject (readonly)

Returns the value of attribute words.



15
16
17
# File 'lib/sm_transcript/transcript.rb', line 15

def words
  @words
end

Instance Method Details

#write_html(dest_file) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/sm_transcript/transcript.rb', line 22

def write_html(dest_file)
  # TODO: Do we want to notify user when overwriting existing file?
  # if File.exists?(dest_file)
  #   p "overwriting existing destination file"
  # end
  File.open(dest_file, "w") do |f|
    span_element = ""
    prev_start_time = 0
    start_time = 0
    @words.each do |w|
      # get the start time and reduce its granularity so that multiple
      # words fall within a <span> element.
      start_time = w.start_time.to_i/1000
      if start_time.to_i == prev_start_time.to_i # append word
        span_element << " #{w.word}"
      else # create a new span_element
        # since prev_start_time is zero on first line, this avoids
        # writing a closing </span> with no opening <span>
        span_element = cleanup_phrase(span_element)
        f.puts span_element << "</span> " unless prev_start_time == 0
        span_element = "<span id='T#{start_time}'>#{w.word}"
        prev_start_time = start_time
      end
    end
    # In the block above, the last word isn't written if
    # the start_time and prev_start_time are the same.
    f.puts span_element << "</span> " unless start_time != prev_start_time
    f.close
  end
end

#write_json(dest_file) ⇒ Object

The JSON format is defined at url/of/document. It is the format of the static timed-text document that is passed to the player.˙



118
119
120
# File 'lib/sm_transcript/transcript.rb', line 118

def write_json(dest_file)

end

#write_sqlite(db_id) ⇒ Object

Store transcript in a Sqlite database (though the essence of this method should work for all relational dbs). Unlike some of the other write_xxx() methods, this one requires a @metadata array. param db_id - for SQLite, this is a filename. video_id - is a unique identifier for the video



129
130
131
132
133
134
135
136
# File 'lib/sm_transcript/transcript.rb', line 129

def write_sqlite(db_id)
  db_id = "sm-transcript"
  db = SQLite3::Database.open(db_id + '.sqlite3')

  fields = XPath.match(doc.root, inner_node_name + '[1]/*').map{|node| node.name}
  field_def = fields.map {|x| "%s TEXT" % x}.join(', ')

end

#write_ttml(dest_file) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/sm_transcript/transcript.rb', line 54

def write_ttml(dest_file)
  # TODO: Do we want to notify user when overwriting existing file?
  # if File.exists?(dest_file)
  #   p "overwriting existing destination file"
  # end
  buf = ""
  bldr = Builder::XmlMarkup.new( :target => buf, :indent => 2 )
  bldr.instruct!
  bldr.tt("xmlns" => "http://www.w3.org/2006/04/ttaf1",
  "xmlns:tts" => "http://www.w3.org/ns/ttml#styling",
  "xmlns:ttm" => "http://www.w3.org/ns/ttml#metadata",
  "xml:lang" => "en" ) {
    bldr.head { |b|
      b.ttm :title, 'The title of this transcript'
      b.ttm :desc,  'The description of this transcript'
    }
    bldr.body {
      bldr.div {
        span_element = ""
        prev_start_secs = 0
        start_ms = end_ms = 0
        start_secs = 0
        @words.each do |w|
          # get the start time and reduce its granularity so that 
          # multiple words form a phrase.
          start_secs = w.start_time.to_i/1000
          if start_secs == prev_start_secs # append word
            end_ms = w.end_time.to_i
            span_element << " #{w.word}"
          else # create a new span_element
            start_secs = w.start_time.to_i/1000
            bldr.p( span_element,
              "xml:id" => "T#{start_secs.to_s}",
              "begin" => "#{start_ms.to_s}ms",
              "dur" => "#{(end_ms - start_ms).to_s}ms",
              "end" => "#{end_ms.to_s}ms" )

            start_ms = w.start_time.to_i
            end_ms   = w.end_time.to_i
            span_element = " #{w.word}"
            prev_start_secs = start_secs
          end
        end # @words.each
        
        # In the block above, the last word isn't written if
        # the start_time and prev_start_time are the same.
        bldr.p( span_element,
          "xml:id" => "T#{start_secs.to_s}",
          "begin" => "#{start_ms.to_s}ms",
          "dur" => "#{(end_ms - start_ms).to_s}ms",
          "end" => "#{end_ms.to_s}ms" ) unless start_secs != prev_start_secs
      }
    }
  }
  # p buf
  File.open(dest_file, "w") do |f|
    f.puts buf
    f.flush
  end
end