Class: SmTranscript::SegReader

Inherits:
Object
  • Object
show all
Defined in:
lib/sm_transcript/seg_reader.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(xml_doc) ⇒ SegReader

Returns a new instance of SegReader.



21
22
23
24
25
26
27
# File 'lib/sm_transcript/seg_reader.rb', line 21

def initialize(xml_doc)
   = {}
  @words = []
  @root = xml_doc.root
  ()
  parse_words()
end

Instance Attribute Details

#metadataObject (readonly)

Returns the value of attribute metadata.



13
14
15
# File 'lib/sm_transcript/seg_reader.rb', line 13

def 
  
end

#wordsObject (readonly)

Returns the value of attribute words.



14
15
16
# File 'lib/sm_transcript/seg_reader.rb', line 14

def words
  @words
end

Class Method Details

.from_file(file_name) ⇒ Object



16
17
18
19
# File 'lib/sm_transcript/seg_reader.rb', line 16

def self.from_file(file_name)
  # p File.expand_path(file_name)
  new(REXML::Document.new File.open(file_name))
end

Instance Method Details

#parse_metadataObject



29
30
31
32
33
# File 'lib/sm_transcript/seg_reader.rb', line 29

def ()
  reg = Regexp.new('[\w\-_]*\.seg$')
  ["orig_seg_path"] =  # absolute path to segfile on processor
  reg.match(@root.attributes.get_attribute("fileName").value)      
end

#parse_wordsObject



35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/sm_transcript/seg_reader.rb', line 35

def parse_words()
  # line is expected to contain two integers separated by a space, 
  # followed by a space and one or more words.  The words may contain
  # characters, or an apostrophe
  arr = []
  @root.elements.each("/document/lecture/segment") do |s|
    s.text.scan(/^\d* \d* [\w']*$/) do |t|
      arr = t.split
      @words << SmTranscript::Word.new(arr[0], arr[1], arr[1].to_i - arr[0].to_i, arr[2])
    end
    @words << SmTranscript::Word.new(arr[0], arr[1], arr[1].to_i - arr[0].to_i, arr[2])
  end
end