Class: Copycasts::Crawling

Inherits:
Object
  • Object
show all
Defined in:
lib/copycasts.rb

Constant Summary collapse

TARGET_URL =
'http://railscasts.com'

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Crawling

Returns a new instance of Crawling.



11
12
13
# File 'lib/copycasts.rb', line 11

def initialize(options = {})
  @pages = options[:page] || maximum_page
end

Instance Method Details

#download_videosObject



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/copycasts.rb', line 64

def download_videos
  downloaded = 0
  mp4_video_links.each do |video_link|
    count = 0
    uri = URI.parse(video_link)
    file_name = video_link.split("/").last

    Net::HTTP.start(uri.host) do |http|
      response = http.request_head(uri.request_uri)
      progress = ProgressBar.new("#{downloaded} downloaded", response['content-length'].to_i)
      File.open(file_name, "wb") do |file|
        http.get(uri.request_uri) do |request_return|
          file.write(request_return)
          count += request_return.length
          progress.set(count)
        end
      end
    end
    downloaded += 1
  end

  puts "Downloaded all files successfully!"
end


15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/copycasts.rb', line 15

def get_links
  casts_list = []
  puts "Start crawling page "

  for index in 1..@pages
    print "#{index}"
    print ", " if index != @pages
    target_page = Nokogiri::HTML(open(TARGET_URL + "/?type=free&page=#{index}"))
    target_page.css('.watch a:first').each do |link|
      link_without_autoplay = link['href'].to_s.sub('?autoplay=true','')
      casts_list << link_without_autoplay
    end
  end
  puts "\n"
  casts_list
end

#maximum_pageObject



32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/copycasts.rb', line 32

def maximum_page
  target_page = Nokogiri::HTML(open(TARGET_URL + "/?type=free"))
  ret = 0
  target_page.css('.pagination a').each do |a|
    if !(a.content.match(/\A[+-]?\d+?(\.\d+)?\Z/) == nil) #not number
      if a.content.to_i > 0
        ret = a.content.to_i
      end
    end
  end
  ret
end


45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/copycasts.rb', line 45

def mp4_video_links
  count = 0
  mp4_links = []
  page_links = get_links
  
  puts "Start crawling for download target"
  progress = ProgressBar.new("Crawling:", page_links.length)

  page_links.each do |video_link|
    video_page = Nokogiri::HTML(open(TARGET_URL + "/" + video_link))
    link = video_page.css('.downloads li[3] a').first
    mp4_links << link.values.first
    count += 1
    progress.set(count)
  end
  puts "\n"
  mp4_links
end