Class: SmTranscript::SbvReader

Inherits:
Object
  • Object
show all
Defined in:
lib/sm_transcript/sbv_reader.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src_file) ⇒ SbvReader

Returns a new instance of SbvReader.



21
22
23
24
25
26
# File 'lib/sm_transcript/sbv_reader.rb', line 21

def initialize(src_file)
   = {}
  @words = []
  ()
  parse_words(src_file)
end

Instance Attribute Details

#metadataObject (readonly)

Returns the value of attribute metadata.



13
14
15
# File 'lib/sm_transcript/sbv_reader.rb', line 13

def 
  
end

#wordsObject (readonly)

Returns the value of attribute words.



14
15
16
# File 'lib/sm_transcript/sbv_reader.rb', line 14

def words
  @words
end

Class Method Details

.from_file(file_name) ⇒ Object



16
17
18
19
# File 'lib/sm_transcript/sbv_reader.rb', line 16

def self.from_file(file_name)
  # p File.expand_path(file_name)
  new(File.open(file_name))
end

Instance Method Details

#parse_metadataObject



28
29
30
31
32
# File 'lib/sm_transcript/sbv_reader.rb', line 28

def ()
  reg = Regexp.new('[\w\-_]*\.seg$')
  ["orig_seg_path"] =  # absolute path to segfile on processor
  reg.match(@root.attributes.get_attribute("fileName").value)
end

#parse_words(src_file) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/sm_transcript/sbv_reader.rb', line 34

def parse_words(src_file)
  # Each block of timecodes and text are made up of two or more lines.  
  # Blocks are separated by a single blank line.
  # The first line of block contains two "timecodes" separated by a comma.
  # The timecodes are in this format: n:nn:nn.nnn. 
  # The next one or more lines contain words separated by spaces.  Each
  # word may contain characters, an apostrophe, or a word preceded by a 
  # percent sign, i.e. "%noise"
  # first line should match ^(\d:\d\d:\d\d\.\d\d\d),\d:\d\d:\d\d\.\d\d\d
  # ^(\d:\d\d:\d\d\.\d\d\d),\d:\d\d:\d\d\.\d\d\d\n.*
  src_file.each do |ln|
      ln.scan(/^\d* \d* %?[\w']*$/) do |t|
      arr = t.split
      @words << SmTranscript::Word.new(arr[0], arr[1], arr[1].to_i - arr[0].to_i, arr[2])
    end
  end
end