Class: TorrentCrawler::Crawlers::Mininova

Inherits:
Base
  • Object
show all
Defined in:
lib/crawlers/mininova.rb

Instance Attribute Summary

Attributes inherited from Base

#results

Instance Method Summary collapse

Methods inherited from Base

#headers, #initialize, #result, #tracker_key

Constructor Details

This class inherits a constructor from TorrentCrawler::Crawlers::Base

Instance Method Details

#detail(tracker_id) ⇒ Object


41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/crawlers/mininova.rb', line 41

def detail(tracker_id)
  doc = Nokogiri::HTML(open(self.detail_url(tracker_id), headers))

  result do |torrent|
    torrent.tracker_id  = tracker_id
    torrent.hash        = doc.css('#torrentdetails p:nth-child(2)').first.text.strip.gsub(/Info hash:\s*(.*)/, '\1')
    torrent.title       = doc.css('h1').first.text.gsub(/Details of (.*)/, '\1')
    # torrent.uploader
    torrent.size        = doc.css('#torrentdetails p:nth-child(6)').first.text.strip.gsub(/(.*) in \d+ files?/, '\1')
    torrent.files       = doc.css('#torrentdetails p:nth-child(6)').first.text.strip.gsub(/.*(\d+) files?$/m, '\1')
    # torrent.seeders
    # torrent.leechers
    # torrent.snatches
    torrent.uploaded_at = DateTime.parse(doc.css('#torrentdetails p:nth-child(3)').first.text.strip.gsub(/Added on:\s*(.*)/, '\1'))

    torrent
  end
end

#detail_url(tracker_id) ⇒ Object


7
8
9
# File 'lib/crawlers/mininova.rb', line 7

def detail_url(tracker_id)
  "http://www.mininova.org/det/#{tracker_id}"
end

#index(last_seen = nil) ⇒ Object


11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/crawlers/mininova.rb', line 11

def index(last_seen = nil)
  doc = Nokogiri::HTML(open(self.index_url, self.headers))
  doc.css('table.maintable:nth-child(2) tr').each do |tr|
    next if tr.css(':nth-child(2) a').first.nil?

    result do |torrent|
      torrent.tracker_id  = tr.css('td:nth-child(2) a').first['href'].gsub(%r{^.*/get/(\d+)/?$}, '\1')

      return results if torrent.tracker_id == last_seen

      # torrent.hash
      torrent.title       = tr.css('td:nth-child(2) a:nth-child(2)').first.text.strip
      # torrent.uploader
      torrent.size        = tr.css('td:nth-child(3)').first.text.strip
      # torrent.files
      torrent.seeders     = tr.css('td:nth-child(4)').first.text.strip
      torrent.leechers    = tr.css('td:nth-child(5)').first.text.strip
      # torrent.snatches
      torrent.uploaded_at = Time.now

      torrent.tags << tr.css('td:nth-child(1)').first.text.strip
      torrent.tags << tr.css('td:nth-child(2) small strong').first.text.strip

      results << torrent
    end
  end

  results
end

#index_urlObject


3
4
5
# File 'lib/crawlers/mininova.rb', line 3

def index_url
  "http://www.mininova.org/"
end