Class: Storyboard::SRT

Inherits:
Object show all
Defined in:
lib/storyboard/subtitles.rb

Defined Under Namespace

Classes: Page

Constant Summary collapse

SPAN_REGEX =
'[[:digit:]]+:[[:digit:]]+:[[:digit:]]+[,\.][[:digit:]]+'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(contents, parent_options) ⇒ SRT

Returns a new instance of SRT.



92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/storyboard/subtitles.rb', line 92

def initialize(contents, parent_options)
  @options = parent_options
  @text = contents
  @pages = []
  @needs_KFhimaji = false
  check_bom(@text.lines.first)
  Storyboard.current_encoding = @encoding
  @text = text.force_encoding(Storyboard.current_encoding)
  parse
  clean_promos
  LOG.info("Parsed subtitle file. #{count} entries found.")
end

Instance Attribute Details

#encodingObject

Returns the value of attribute encoding.



90
91
92
# File 'lib/storyboard/subtitles.rb', line 90

def encoding
  @encoding
end

#optionsObject

Returns the value of attribute options.



90
91
92
# File 'lib/storyboard/subtitles.rb', line 90

def options
  @options
end

#pagesObject

Returns the value of attribute pages.



90
91
92
# File 'lib/storyboard/subtitles.rb', line 90

def pages
  @pages
end

#textObject

Returns the value of attribute text.



90
91
92
# File 'lib/storyboard/subtitles.rb', line 90

def text
  @text
end

Instance Method Details

#check_bom(line) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/storyboard/subtitles.rb', line 106

def check_bom(line)
  bom_check = line.force_encoding("UTF-8").lines.to_a[0].bytes.to_a
  @encoding = 'UTF-8'
  if bom_check[0..1] == [255,254]
    @encoding = "UTF-16LE"
    ret = line[2..6]
  elsif bom_check[0..2] == [239,187,191]
    @encoding = "UTF-8"
    ret = line[3..6]
  end
  line
end

#clean_promosObject

Strip out obnoxious “CREATED BY L33T DUD3” or “DOWNLOADED FROM __” text



169
170
171
172
173
174
175
176
177
# File 'lib/storyboard/subtitles.rb', line 169

def clean_promos
  @pages.delete_if {|page|
    !page[:lines].grep(/Subtitles downloaded/).empty? ||
    !page[:lines].grep(/addic7ed/).empty? ||
    !page[:lines].grep(/OpenSubtitles/).empty? ||
    !page[:lines].grep(/sync, corrected by/).empty? ||
    false
  }
end

#countObject



190
191
192
# File 'lib/storyboard/subtitles.rb', line 190

def count
  @pages.count
end

#fix_encoding(l) ⇒ Object



120
121
122
123
124
125
126
# File 'lib/storyboard/subtitles.rb', line 120

def fix_encoding(l)
  # The only  ISO8859-1  I hit so far. I expec this to grow.
  if !(l.bytes.to_a | [233,146]).empty? && @encoding == 'UTF-8'
    l = l.unpack("C*").pack("U*")
  end
  l
end

#parseObject

There are some horrid files, so I want to be able to have more than just a single regex to parse the srt file. Eventually, handling these errors will be a thing to do.



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/storyboard/subtitles.rb', line 130

def parse
  phase = :line_no
  page = nil
  @text.each_line {|l|
    l = fix_encoding(l)
    l = l.strip
    #p l.bytes.to_a
    case phase
    when :line_no
      l = l.gsub(Storyboard.encode_regexp('\W'),'')
      if l =~ Storyboard.encode_regexp('^\d+$')
        page = Page.new(@pages.count + 1, nil, nil, [])
        phase = :time
      elsif !l.empty?
        raise "Bad SRT File: Should have a block number but got '#{l.force_encoding('UTF-8')}' [#{l.bytes.to_a.join(',')}]"
      end
    when :time

      l = l.gsub(Storyboard.encode_regexp('[^\,\:[0-9] \-\>]'), '')
      if l =~ Storyboard.encode_regexp("^(#{SPAN_REGEX}) --> (#{SPAN_REGEX})$")
        page[:start_time] = STRTime.parse($1) + @options[:nudge]
        page[:end_time] = STRTime.parse($2) + @options[:nudge]
        phase = :text
      else
        raise "Bad SRT File: Should have time range but got '#{l}'".force_encoding(Storyboard.current_encoding)
      end
    when :text
      if l.empty?
        phase = :line_no
        @pages << page
      else
        Storyboard.needs_KFhimaji(true) if l.contains_cjk?
        page[:lines] << l.gsub(Storyboard.encode_regexp("<\/?[^>]*>"), "").encode!("UTF-8")
      end
    end
  }
end

#saveObject



179
180
181
182
183
184
# File 'lib/storyboard/subtitles.rb', line 179

def save
  File.open(File.join(options[:work_dir], options[:basename] + '.srt'), 'w') {|f|
    f.write(self.to_s)
  }
  self
end

#to_sObject



186
187
188
# File 'lib/storyboard/subtitles.rb', line 186

def to_s
   text
end