Class: Copycasts::Crawling
- Inherits:
-
Object
- Object
- Copycasts::Crawling
- Defined in:
- lib/copycasts.rb
Constant Summary collapse
- TARGET_URL =
'http://railscasts.com'
Instance Method Summary collapse
- #download_videos ⇒ Object
- #get_links ⇒ Object
-
#initialize(options = {}) ⇒ Crawling
constructor
A new instance of Crawling.
- #maximum_page ⇒ Object
- #mp4_video_links ⇒ Object
Constructor Details
#initialize(options = {}) ⇒ Crawling
Returns a new instance of Crawling.
11 12 13 |
# File 'lib/copycasts.rb', line 11 def initialize( = {}) @pages = [:page] || maximum_page end |
Instance Method Details
#download_videos ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/copycasts.rb', line 64 def download_videos downloaded = 0 mp4_video_links.each do |video_link| count = 0 uri = URI.parse(video_link) file_name = video_link.split("/").last Net::HTTP.start(uri.host) do |http| response = http.request_head(uri.request_uri) progress = ProgressBar.new("#{downloaded} downloaded", response['content-length'].to_i) File.open(file_name, "wb") do |file| http.get(uri.request_uri) do |request_return| file.write(request_return) count += request_return.length progress.set(count) end end end downloaded += 1 end puts "Downloaded all files successfully!" end |
#get_links ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/copycasts.rb', line 15 def get_links casts_list = [] puts "Start crawling page " for index in 1..@pages print "#{index}" print ", " if index != @pages target_page = Nokogiri::HTML(open(TARGET_URL + "/?type=free&page=#{index}")) target_page.css('.watch a:first').each do |link| link_without_autoplay = link['href'].to_s.sub('?autoplay=true','') casts_list << link_without_autoplay end end puts "\n" casts_list end |
#maximum_page ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/copycasts.rb', line 32 def maximum_page target_page = Nokogiri::HTML(open(TARGET_URL + "/?type=free")) ret = 0 target_page.css('.pagination a').each do |a| if !(a.content.match(/\A[+-]?\d+?(\.\d+)?\Z/) == nil) #not number if a.content.to_i > 0 ret = a.content.to_i end end end ret end |
#mp4_video_links ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/copycasts.rb', line 45 def mp4_video_links count = 0 mp4_links = [] page_links = get_links puts "Start crawling for download target" progress = ProgressBar.new("Crawling:", page_links.length) page_links.each do |video_link| video_page = Nokogiri::HTML(open(TARGET_URL + "/" + video_link)) link = video_page.css('.downloads li[3] a').first mp4_links << link.values.first count += 1 progress.set(count) end puts "\n" mp4_links end |