Class: YoutubeTranscript2020

Inherits:
Object
  • Object
show all
Defined in:
lib/youtube_transcript2020.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(id = nil) ⇒ YoutubeTranscript2020

Returns a new instance of YoutubeTranscript2020.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/youtube_transcript2020.rb', line 13

def initialize(id=nil)

  return unless id

  @id = if id[/https:\/\/www\.youtube\.com\/watch\?v=/] then
    id[/(?<=^https:\/\/www\.youtube\.com\/watch\?v=).*/]
  elsif id[/https:\/\/youtu\.be\//]
    id[/(?<=^https:\/\/youtu\.be\/).*/]
  else
    id
  end

  s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
  @s = parse s

  fetch_info(@id)

end

Instance Attribute Details

#authorObject (readonly)

Returns the value of attribute author.



11
12
13
# File 'lib/youtube_transcript2020.rb', line 11

def author
  @author
end

#idObject (readonly)

Returns the value of attribute id.



11
12
13
# File 'lib/youtube_transcript2020.rb', line 11

def id
  @id
end

#titleObject (readonly)

Returns the value of attribute title.



11
12
13
# File 'lib/youtube_transcript2020.rb', line 11

def title
  @title
end

#to_aObject (readonly)

Returns the value of attribute to_a.



11
12
13
# File 'lib/youtube_transcript2020.rb', line 11

def to_a
  @to_a
end

Instance Method Details

#import(obj) ⇒ Object

reads a plain text transcript which has been modified to include headings



46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/youtube_transcript2020.rb', line 46

def import(obj)

  s = RXFHelper.read(obj).first

  header, body = s.split(/-----+/,2)

  h = SimpleConfig.new(header).to_h
  @id, @author, @title = h[:id], h[:author], h[:title]
  @s = body
  
  a = body.lines.map(&:chomp).partition {|x| x =~ /\d+:\d+/ }    
  @a = a[0].zip(a[1])    

end

#to_headingsObject

Outputs plain text containing the headings including timestamps note: This can be helpful for copyng and pasting directly into a YouTube comment



101
102
103
104
105
# File 'lib/youtube_transcript2020.rb', line 101

def to_headings()    
  
  @to_a.select {|timestamp, _| timestamp =~ / /}.map(&:first)    

end

#to_htmlObject

Outputs HTML containing the embedded video and transcription



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/youtube_transcript2020.rb', line 63

def to_html()

  url = 'https://www.youtube.com/embed/' + @id

  links = @a.map do |timestamp, s|
    
    seconds = Subunit.new(units={minutes:60, hours:60}, 
                timestamp.split(':').map(&:to_i)).to_i
    "<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
        % [url, seconds, timestamp, s]
  end

<<EOF
<!DOCTYPE html>
<html lang="en">
<head>
  <title></title>
  <meta charset="utf-8" />
</head>
<body>
<div style="width: 1080px; background: white">
<div style="float:left; width: 580px; background: white">
<iframe width="560" height="315" src="#{url}?start=67&autoplay=1" name="video" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
<h1>#{@title}</h1>
</div>
<div style="float:right; width: 500px; overflow-y: scroll; height: 400px">
<ul>#{links.join("\n")}</ul>
</div>

</div>
</body>
</html>
EOF
end

#to_sObject

returns the transcript in plain text including timestamps



38
39
40
41
42
# File 'lib/youtube_transcript2020.rb', line 38

def to_s()

  h = {id: @id, title: @title, author: @author}
  SimpleConfig.new(h).to_s + "\n#{'-'*78}\n\n" + @s
end