Class: YoutubeTranscript2020

Inherits:
Object
  • Object
show all
Defined in:
lib/youtube_transcript2020.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(id = nil) ⇒ YoutubeTranscript2020

Returns a new instance of YoutubeTranscript2020.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/youtube_transcript2020.rb', line 14

def initialize(id=nil)

  return unless id

  @id = if id[/https:\/\/www\.youtube\.com\/watch\?v=/] then
    id[/(?<=^https:\/\/www\.youtube\.com\/watch\?v=).*/]
  elsif id[/https:\/\/youtu\.be\//]
    id[/(?<=^https:\/\/youtu\.be\/).*/]
  else
    id
  end

  s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
  @s = parse s

  fetch_info(@id)

end

Instance Attribute Details

#authorObject (readonly)

Returns the value of attribute author.



12
13
14
# File 'lib/youtube_transcript2020.rb', line 12

def author
  @author
end

#idObject (readonly)

Returns the value of attribute id.



12
13
14
# File 'lib/youtube_transcript2020.rb', line 12

def id
  @id
end

#titleObject (readonly)

Returns the value of attribute title.



12
13
14
# File 'lib/youtube_transcript2020.rb', line 12

def title
  @title
end

#to_aObject (readonly)

Returns the value of attribute to_a.



12
13
14
# File 'lib/youtube_transcript2020.rb', line 12

def to_a
  @to_a
end

Instance Method Details

#import(obj) ⇒ Object

reads a plain text transcript which has been modified to include headings



51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/youtube_transcript2020.rb', line 51

def import(obj)

  s = RXFHelper.read(obj).first

  header, body = s.split(/-----+/,2)

  h = SimpleConfig.new(header).to_h
  @id, @author, @title = h[:id], h[:author], h[:title]
  @s = body
  
  a = body.lines.map(&:chomp).partition {|x| x =~ /\d+:\d+/ }    
  @a = a[0].zip(a[1])    

end

#to_headingsObject

Outputs plain text containing the headings including timestamps note: This can be helpful for copyng and pasting directly into a YouTube comment



106
107
108
109
110
# File 'lib/youtube_transcript2020.rb', line 106

def to_headings()    
  
  @to_a.select {|timestamp, _| timestamp =~ / /}.map(&:first)    

end

#to_htmlObject

Outputs HTML containing the embedded video and transcription



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/youtube_transcript2020.rb', line 68

def to_html()

  url = 'https://www.youtube.com/embed/' + @id

  links = @a.map do |timestamp, s|
    
    seconds = Subunit.new(units={minutes:60, hours:60}, 
                timestamp.split(':').map(&:to_i)).to_i
    "<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
        % [url, seconds, timestamp, s]
  end

<<EOF
<!DOCTYPE html>
<html lang="en">
<head>
  <title></title>
  <meta charset="utf-8" />
</head>
<body>
<div style="width: 1080px; background: white">
<div style="float:left; width: 580px; background: white">
<iframe width="560" height="315" src="#{url}&autoplay=1" name="video" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
<h1>#{@title}</h1>
</div>
<div style="float:right; width: 500px; overflow-y: scroll; height: 400px">
<ul>#{links.join("\n")}</ul>
</div>

</div>
</body>
</html>
EOF
end

#to_keywords(level: 2) ⇒ Object

returns a Hash object containing the frequenecy of each word level: 2 (ignores commond words including stop words) level: 3 (ignores dictionary words)



116
117
118
# File 'lib/youtube_transcript2020.rb', line 116

def to_keywords(level: 2)
  Yawc.new(self.to_text(), level: level).to_h
end

#to_sObject

returns the transcript in plain text including timestamps



39
40
41
42
43
# File 'lib/youtube_transcript2020.rb', line 39

def to_s()

  h = {id: @id, title: @title, author: @author}
  SimpleConfig.new(h).to_s + "\n#{'-'*78}\n\n" + @s
end

#to_textObject



45
46
47
# File 'lib/youtube_transcript2020.rb', line 45

def to_text()
  @a.map(&:last).join("\n")
end