Class: RubyTube::Extractor

Inherits:
Object
  • Object
show all
Defined in:
lib/rubytube/extractor.rb

Class Method Summary collapse

Class Method Details

.apply_descrambler(stream_data) ⇒ Object



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/rubytube/extractor.rb', line 130

def apply_descrambler(stream_data)
  return if stream_data.has_key?("url")

  # Merge formats and adaptiveFormats into a single array
  formats = []
  formats += stream_data["formats"] if stream_data.has_key?("formats")
  formats += stream_data["adaptiveFormats"] if stream_data.has_key?("adaptiveFormats")

  # Extract url and s from signatureCiphers as necessary
  formats.each do |data|
    unless data.has_key?("url")
      if data.has_key?("signatureCipher")
        cipher_url = URI.decode_www_form(data["signatureCipher"]).to_h
        data["url"] = cipher_url["url"]
        data["s"] = cipher_url["s"]
      end
    end
    data["is_otf"] = data["type"] == "FORMAT_STREAM_TYPE_OTF"
  end

  formats
end

.apply_signature(stream_manifest, vid_info, js) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/rubytube/extractor.rb', line 90

def apply_signature(stream_manifest, vid_info, js)
  cipher = Cipher.new(js)

  stream_manifest.each_with_index do |stream, i|
    begin
      url = stream["url"]
    rescue NoMethodError
      live_stream = vid_info.fetch("playabilityStatus", {})["liveStreamability"]
      if live_stream
        raise LiveStreamError.new("UNKNOWN")
      end
    end

    if url.include?("signature") ||
        (!stream.key?("s") && (url.include?("&sig=") || url.include?("&lsig=")))
      # For certain videos, YouTube will just provide them pre-signed, in
      # which case there's no real magic to download them and we can skip
      # the whole signature descrambling entirely.
      next
    end

    signature = cipher.get_signature(stream["s"])

    parsed_url = URI.parse(url)

    query_params = CGI.parse(parsed_url.query)
    query_params.transform_values!(&:first)
    query_params["sig"] = signature
    unless query_params.key?("ratebypass")
      initial_n = query_params["n"].chars
      new_n = cipher.calculate_n(initial_n)
      query_params["n"] = new_n
    end

    url = "#{parsed_url.scheme}://#{parsed_url.host}#{parsed_url.path}?#{URI.encode_www_form(query_params)}"

    stream_manifest[i]["url"] = url
  end
end

.get_ytplayer_config(html) ⇒ Object

Raises:



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/rubytube/extractor.rb', line 65

def get_ytplayer_config(html)
  config_patterns = [
    /ytplayer\.config\s*=\s*/,
    /ytInitialPlayerResponse\s*=\s*/
  ]

  config_patterns.each do |pattern|
    return Parser.parse_for_object(html, pattern)
  rescue HTMLParseError => e
    next
  end

  setconfig_patterns = [
    /yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*/
  ]

  setconfig_patterns.each do |pattern|
    return Parser.parse_for_object(html, pattern)
  rescue HTMLParseError => e
    next
  end

  raise RegexMatchError.new("get_ytplayer_config", "config_patterns, setconfig_patterns")
end

.get_ytplayer_js(html) ⇒ Object

Raises:



50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/rubytube/extractor.rb', line 50

def get_ytplayer_js(html)
  js_url_patterns = [
    %r{(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)}
  ]

  js_url_patterns.each do |pattern|
    function_match = html.match(pattern)
    if function_match
      return function_match[1]
    end
  end

  raise RegexMatchError.new("get_ytplayer_js", "js_url_patterns")
end

.js_url(html) ⇒ Object



30
31
32
33
34
35
36
37
38
# File 'lib/rubytube/extractor.rb', line 30

def js_url(html)
  begin
    base_js = get_ytplayer_config(html)["assets"]["js"]
  rescue RegexMatchError, NoMethodError
    base_js = get_ytplayer_js(html)
  end

  "https://youtube.com#{base_js}"
end

.mime_type_codec(mime_type_codec) ⇒ Object

Raises:



40
41
42
43
44
45
46
47
48
# File 'lib/rubytube/extractor.rb', line 40

def mime_type_codec(mime_type_codec)
  pattern = %r{(\w+/\w+);\scodecs="([a-zA-Z\-0-9.,\s]*)"}
  results = mime_type_codec.match(pattern)

  raise RegexMatchError.new("mime_type_codec, pattern=#{pattern}") if results.nil?

  mime_type, codecs = results.captures
  [mime_type, codecs.split(",").map(&:strip)]
end

.playability_status(watch_html) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/rubytube/extractor.rb', line 4

def playability_status(watch_html)
  player_response = initial_player_response(watch_html)
  player_response = JSON.parse(player_response)
  status_obj = player_response["playabilityStatus"] || {}

  if status_obj.has_key?("liveStreamability")
    return ["LIVE_STREAM", "Video is a live stream."]
  end

  if status_obj.has_key?("status")
    if status_obj.has_key?("reason")
      return [status_obj["status"], [status_obj["reason"]]]
    end

    if status_obj.has_key?("messages")
      return [status_obj["status"], status_obj["messages"]]
    end
  end

  [nil, [nil]]
end

.video_id(url) ⇒ Object



26
27
28
# File 'lib/rubytube/extractor.rb', line 26

def video_id(url)
  Utils.regex_search(/(?:v=|\/)([0-9A-Za-z_-]{11}).*/, url, 1)
end