Module: YoutubeChannel

Included in:
BrilliantWebScraper
Defined in:
lib/parsers/youtube_channel.rb

Overview

Grep youtube channels

Instance Method Summary collapse

Instance Method Details

#grep_youtube_channel(response) ⇒ Object



5
6
7
8
9
10
11
12
13
# File 'lib/parsers/youtube_channel.rb', line 5

def grep_youtube_channel(response)
  return if response !~ %r{(?im)https?:\/\/(?:www\.)?youtube\.com\/}

  first_regex = %r{(?im)(https?:\/\/(?:www\.)?youtube\.com\/(?!\?gl=\w{2}|(?:embed|feeds)\/|(?:player_api|iframe_api)(?:"|'|\/|\?)|watch\?|user\/#)[^"'\&<>\s]+)}
  second_regex = %r{(?im)(https?:\/\/(?:www\.)?youtube\.com\/watch?\S*v=[^<>&'"]+)}
  third_regex = %r{(?im)(https?:\/\/(?:www\.)?youtube\.com\/embed\/(?!id|{|}|\[|\]|\$|\?|\\|%|\+)[^"'\?<>\s]+)}
  youtube_channels = scrape_profile(response, [first_regex, second_regex, third_regex])
  youtube_channels.compact.uniq
end