Class: BcCrawler::Release

Inherits:
Object
  • Object
show all
Defined in:
lib/bc_crawler/release.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ Release

Returns a new instance of Release.



9
10
11
12
# File 'lib/bc_crawler/release.rb', line 9

def initialize(url)
  @url = url
  @tracks = []
end

Instance Attribute Details

#aboutObject (readonly)

Returns the value of attribute about.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def about
  @about
end

#art_fullsize_urlObject (readonly)

Returns the value of attribute art_fullsize_url.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def art_fullsize_url
  @art_fullsize_url
end

#art_idObject (readonly)

Returns the value of attribute art_id.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def art_id
  @art_id
end

#art_thumb_urlObject (readonly)

Returns the value of attribute art_thumb_url.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def art_thumb_url
  @art_thumb_url
end

#artistObject (readonly)

Returns the value of attribute artist.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def artist
  @artist
end

#band_idObject (readonly)

Returns the value of attribute band_id.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def band_id
  @band_id
end

#creditsObject (readonly)

Returns the value of attribute credits.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def credits
  @credits
end

#dataObject (readonly)

Returns the value of attribute data.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def data
  @data
end

Returns the value of attribute featured_track_id.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def featured_track_id
  @featured_track_id
end

#has_audioObject (readonly)

Returns the value of attribute has_audio.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def has_audio
  @has_audio
end

#htmlObject (readonly)

Returns the value of attribute html.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def html
  @html
end

#idObject (readonly)

Returns the value of attribute id.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def id
  @id
end

#purchase_urlObject (readonly)

Returns the value of attribute purchase_url.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def purchase_url
  @purchase_url
end

#release_dateObject (readonly)

Returns the value of attribute release_date.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def release_date
  @release_date
end

#titleObject (readonly)

Returns the value of attribute title.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def title
  @title
end

#tracksObject (readonly)

Returns the value of attribute tracks.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def tracks
  @tracks
end

#typeObject (readonly)

Returns the value of attribute type.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def type
  @type
end

#urlObject (readonly)

Returns the value of attribute url.



5
6
7
# File 'lib/bc_crawler/release.rb', line 5

def url
  @url
end

Instance Method Details

#crawl(nodes = %w(artFullsizeUrl artThumbURL current hasAudio trackinfo url))) ⇒ Object

Scan the HTML for a particular JavaScript snippet where a variable named “TralbumData” is assigned. TralbumData contains all information about the release (and its tracks), but has to be cleaned first in order to get a valid JSON object.

By default, only the main nodes in TralbumData are crawled. There are more nodes available.

nodes = %w(album_is_preorder album_release_date artFullsizeUrl artist artThumbURL
           current defaultPrice featured_track_id FREE freeDownloadPage hasAudio
           id initial_track_num is_preorder item_type last_subscription_item
           maxPrice minPrice packages PAID playing_from preorder_count trackinfo url)


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/bc_crawler/release.rb', line 24

def crawl(nodes = %w(artFullsizeUrl artThumbURL current hasAudio trackinfo url))
  puts "Crawling #{@url}"
  @nodes = nodes

  # call the URL, fetch the JavaScript code (TralbumData) and clean the string
  @html = open(@url).read
  js_content = html.gsub(/\n/, '~~')[/var TralbumData = \{(.*?)\};/, 1] # get content of JS variable TralbumData
                   .gsub('~~', "\n")                                  # undo line endings replacement
                   .gsub("\t", '')                                    # remove tabs
                   .gsub("\" + \"", '')                               # special bug in "url" node

  # scan the JavaScript code text for the given nodes
  json_nodes = []
  @nodes.each do |node|
    json_nodes << js_content[/^( )*#{node}( )*:.*$/]                  # fetch current node in JavaScript object
                           .gsub(/#{node}/, "\"#{node}\"")            # add double quotes around node name
                           .gsub(/( )*,( )*$/, '')                    # remove empty lines with comma
  end

  @data = JSON.parse("{ #{ json_nodes.join(', ') } }")

  # Finally, we load the release info
  load_release_info
end

#load_release_infoObject

Assign some of the main information to instance variables TODO: make ALL information available as instance variables



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/bc_crawler/release.rb', line 51

def load_release_info
  @art_fullsize_url   = @data['artFullsizeUrl']
  @art_thumb_url      = @data['artThumbURL']
  @art_id             = @data['current']['art_it']
  @about              = @data['current']['about']
  @featured_track_id  = @data['current']['featured_track_id']
  @credits            = @data['current']['credits']
  @artist             = @data['current']['artist']
  @purchase_url       = @data['current']['purchase_url']
  @band_id            = @data['current']['band_id']
  @id                 = @data['current']['id']
  @release_date       = @data['current']['release_date']
  @type               = @data['current']['type']
  @title              = @data['current']['title']
  @has_audio          = @data['hasAudio']
  load_track_info
end

#load_track_infoObject

Tracks have their own class



70
71
72
73
74
# File 'lib/bc_crawler/release.rb', line 70

def load_track_info
  @data['trackinfo'].each do |track|
    @tracks << Track.new(self, track)
  end
end

#to_sObject



76
77
78
79
80
81
82
83
84
# File 'lib/bc_crawler/release.rb', line 76

def to_s
  <<-EOF
  URL : #{ @url }
  Artist : #{ @artist }
  Release title : #{ @title }
  Number of tracks : #{ @tracks.count }
  #{ '(use .crawl method to fetch the missing data)' if @artist.nil? }
  EOF
end