Class: SmTranscript::Transcript

Inherits:
Object
  • Object
show all
Defined in:
lib/sm_transcript/transcript.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(word_arr) ⇒ Transcript

Returns a new instance of Transcript.



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/sm_transcript/transcript.rb', line 21

def initialize(word_arr)
   = {}
  begin
    @words = word_arr
    
  rescue @words.length == 0
    STDERR.puts "words array is empty"
    exit(-1)
  rescue @words.nil?
    STDERR.puts "words is nil"
    exit(-1)
  rescue @words.responds_to('each') 
    STDERR.puts "object doesn't have an each() method"
    exit(-1)
  end
  
end

Instance Attribute Details

#wordsObject (readonly)

Returns the value of attribute words.



19
20
21
# File 'lib/sm_transcript/transcript.rb', line 19

def words
  @words
end

Instance Method Details

#write_html(dest_file) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/sm_transcript/transcript.rb', line 39

def write_html(dest_file)
  begin
    # TODO: Do we want to notify user when overwriting existing file?
    # if File.exists?(dest_file)
    #   p "overwriting existing destination file"
    # end
    File.open(dest_file, "w") do |f|
      # write Title into <head> for the benefit of Google Search Appliance
      f.puts '<head>'
      
      f.puts '</head>'
      f.puts '<body>'
      span_element = ""
      prev_start_time = 0
      start_time = 0
      if @words.nil?
        STDERR.puts "object doesn't have an each() method"
        STDERR.puts dest_file
      end
      STDERR.puts dest_file
      cntr = 0
      @words.each do |w|
        cntr += 1
        # p "word cntr: #{cntr}"
        # get the start time and reduce its granularity so that multiple
        # words fall within a <span> element.
        start_time = w.start_time.to_i/1000
        if start_time.to_i == prev_start_time.to_i # append word
          span_element << " #{w.word}"
        else # create a new span_element
          # since prev_start_time is zero on first line, this avoids
          # writing a closing </span> with no opening <span>
          span_element = cleanup_phrase(span_element)
          f.puts span_element << "</span> " unless prev_start_time == 0
          span_element = "<span id='T#{start_time}'>#{w.word}"
          prev_start_time = start_time
        end
      end
      # In the block above, the last word isn't written if
      # the start_time and prev_start_time are the same.
      f.puts span_element << "</span> " unless start_time != prev_start_time
      f.puts '</body>'
      f.close
    end
    
  # rescue @words.nil? 
  #   STDERR.puts "object doesn't have an each() method"
  #   exit(-1)
  end
end

#write_json(dest_file) ⇒ Object

The JSON format is defined at url/of/document. It is the format of the static timed-text document that is passed to the player.˙



155
156
157
# File 'lib/sm_transcript/transcript.rb', line 155

def write_json(dest_file)

end

#write_sqlite(db_id) ⇒ Object

Store transcript in a Sqlite database (though the essence of this method should work for all relational dbs). Unlike some of the other write_xxx() methods, this one requires a @metadata array. param db_id - for SQLite, this is a filename. video_id - is a unique identifier for the video



166
167
168
169
170
171
172
173
# File 'lib/sm_transcript/transcript.rb', line 166

def write_sqlite(db_id)
  db_id = "sm-transcript"
  db = SQLite3::Database.open(db_id + '.sqlite3')

  fields = XPath.match(doc.root, inner_node_name + '[1]/*').map{|node| node.name}
  field_def = fields.map {|x| "%s TEXT" % x}.join(', ')

end

#write_ttml(dest_file) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/sm_transcript/transcript.rb', line 91

def write_ttml(dest_file)
  # TODO: Do we want to notify user when overwriting existing file?
  # if File.exists?(dest_file)
  #   p "overwriting existing destination file"
  # end
  buf = ""
  bldr = Builder::XmlMarkup.new( :target => buf, :indent => 2 )
  bldr.instruct!
  bldr.tt("xmlns" => "http://www.w3.org/2006/04/ttaf1",
  "xmlns:tts" => "http://www.w3.org/ns/ttml#styling",
  "xmlns:ttm" => "http://www.w3.org/ns/ttml#metadata",
  "xml:lang" => "en" ) {
    bldr.head { |b|
      b.ttm :title, 'The title of this transcript'
      b.ttm :desc,  'The description of this transcript'
    }
    bldr.body {
      bldr.div {
        span_element = ""
        prev_start_secs = 0
        start_ms = end_ms = 0
        start_secs = 0
        @words.each do |w|
          # get the start time and reduce its granularity so that 
          # multiple words form a phrase.
          start_secs = w.start_time.to_i/1000
          if start_secs == prev_start_secs # append word
            end_ms = w.end_time.to_i
            span_element << " #{w.word}"
          else # create a new span_element
            start_secs = w.start_time.to_i/1000
            bldr.p( span_element,
              "xml:id" => "T#{start_secs.to_s}",
              "begin" => "#{start_ms.to_s}ms",
              "dur" => "#{(end_ms - start_ms).to_s}ms",
              "end" => "#{end_ms.to_s}ms" )

            start_ms = w.start_time.to_i
            end_ms   = w.end_time.to_i
            span_element = " #{w.word}"
            prev_start_secs = start_secs
          end
        end # @words.each
        
        # In the block above, the last word isn't written if
        # the start_time and prev_start_time are the same.
        bldr.p( span_element,
          "xml:id" => "T#{start_secs.to_s}",
          "begin" => "#{start_ms.to_s}ms",
          "dur" => "#{(end_ms - start_ms).to_s}ms",
          "end" => "#{end_ms.to_s}ms" ) unless start_secs != prev_start_secs
      }
    }
  }
  # p buf
  File.open(dest_file, "w") do |f|
    f.puts buf
    f.flush
  end
end