Class: Javlibrary

Inherits:
Object
  • Object
show all
Defined in:
lib/javlibrary.rb,
lib/javlibrary/version.rb

Constant Summary collapse

JAVLIBRARY_URL =
[ "jav11b.com", "javlibrary.com" ]
VERSION =
"0.3.0"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(database_name = 'javlibrary', user = 'root', pwd = 'default') ⇒ Javlibrary

Returns a new instance of Javlibrary.



12
13
14
15
16
17
18
19
20
# File 'lib/javlibrary.rb', line 12

def initialize(database_name = 'javlibrary', user = 'root', pwd = 'default')
    # Define client variable
    @database = database_name
    @username = user
    @password = pwd

    # Define default Javlibrary url
    @url = JAVLIBRARY_URL[1]
end

Instance Attribute Details

#databaseObject

Returns the value of attribute database.



22
23
24
# File 'lib/javlibrary.rb', line 22

def database
  @database
end

#passwordObject

Returns the value of attribute password.



22
23
24
# File 'lib/javlibrary.rb', line 22

def password
  @password
end

#urlObject

Returns the value of attribute url.



22
23
24
# File 'lib/javlibrary.rb', line 22

def url
  @url
end

#usernameObject

Returns the value of attribute username.



22
23
24
# File 'lib/javlibrary.rb', line 22

def username
  @username
end

Instance Method Details

#actor_hashObject



164
165
166
167
168
169
170
171
172
173
# File 'lib/javlibrary.rb', line 164

def actor_hash
    client = client()
    actor_hash = Hash.new
    client.query("SELECT * FROM actor").each do |item|
        actor_hash["#{item['actor_name']}"] = item['actor_id']
    end
    client.close

    actor_hash
end

#author_page_num(nokogiri_doc) ⇒ Object



216
217
218
219
220
221
222
# File 'lib/javlibrary.rb', line 216

def author_page_num(nokogiri_doc)
    last_page = 1
    nokogiri_doc.search('//div[@class="page_selector"]/a[@class="page last"]').each do |row|
        last_page = row['href'].split("=")[-1].to_i
    end
    last_page
end

#clientObject



24
25
26
27
28
29
# File 'lib/javlibrary.rb', line 24

def client()
    client = Mysql2::Client.new(:host => "127.0.0.1",
                                :username => "#{@username}",
                                :password => "#{@password}",
                                :database => "#{@database}")
end

#download_all_video_labelsObject



322
323
324
325
326
327
328
329
330
331
# File 'lib/javlibrary.rb', line 322

def download_all_video_labels
    thread_pool =[]
    'A'.upto('Z').each do |alphabet|
        thread_temp = Thread.new{
            select_actor(alphabet)
        }
        thread_pool << thread_temp
    end
    thread_pool.map(&:join)
end

#download_all_videosObject



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/javlibrary.rb', line 146

def download_all_videos
    client = client()
    result = client.query("SELECT * FROM label WHERE video_download=0")
    client.close
    result = result.collect{ |x| x }; result.shuffle!
    
    actor_hash = actor_hash()
    genre_hash = genre_hash()
    result.each do |item|
        begin
            video_info_insert(item['video_num'], item['video_label'],
                actor_hash, genre_hash)
        rescue
            next
        end
    end
end

#download_all_videos_threadObject



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/javlibrary.rb', line 112

def download_all_videos_thread
    client = client()
    result = client.query("SELECT video_num, video_label FROM label WHERE video_download=0")
    client.close

    video_array = Array.new
    result.each do |item|
        video_array << item
    end

    video_array = video_array.each_slice(5000).to_a

    actor_hash = actor_hash()
    genre_hash = genre_hash()
    thread_pool = Array::new

    video_array.each do |group|
        # Create a download thread
        thread_temp = Thread.new {
            group.each do |item|
                begin
                    video_info_insert(item['video_num'], item['video_label'],
                        actor_hash, genre_hash)
                rescue
                    next
                end
            end
        }
        thread_pool << thread_temp
    end

    thread_pool.map(&:join)
end

#download_video_label(actor_id) ⇒ Object



267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# File 'lib/javlibrary.rb', line 267

def download_video_label(actor_id)
    firsturl = "http://www.#{@url}/ja/vl_star.php?s=#{actor_id}"
    baseurl = "http://www.#{@url}/ja/vl_star.php?&mode=&s=#{actor_id}&page="

    begin
        response = RestClient.get firsturl
    rescue
        retry
    end

    doc = Nokogiri::HTML(response.body)
    last_page = 1
    doc.search('//div[@class="page_selector"]/a[@class="page last"]').each do |row|
        last_page = row['href'].split("=")[-1].to_i
    end

    result = []
    1.upto(last_page) do |page|
        tempurl = baseurl + page.to_s
        begin
            response = RestClient.get tempurl
        rescue
            retry
        end

        Nokogiri::HTML(response.body).search('//div[@class="video"]/a').each do |row|
            # Data:
            # Video_label: row['href'].split("=")[-1]
            # Video_title: row['title']
            # client.query("INSERT INTO label (lable) VALUES ('#{row['href'].split("=")[-1]}')")
            result << row['href'].split("=")[-1]
        end
    end

    client = client()
    result.each do |e|
        begin
            client.query("INSERT INTO label (video_label, video_download) VALUES ('#{e}', '0')")
        rescue
            next
        end
    end
    client.close
end

#downloader(identifer) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/javlibrary.rb', line 31

def downloader(identifer)
    baseurl = "http://www.#{@url}/cn/?v=#{identifer}"
    response = Mechanize.new
    response.user_agent = Mechanize::AGENT_ALIASES.values[rand(21)]
    response.read_timeout = 2
    response.open_timeout = 2
    begin
        response.get baseurl
    rescue Timeout::Error
        retry
    rescue
        return
    end

    doc = Nokogiri::HTML(response.page.body)

    video_title, details, video_genres, video_jacket_img = String.new, Array.new, String.new, String.new

    video_title = doc.search('div[@id="video_title"]/h3/a').children.text
    doc.search('//div[@id="video_info"]/div[@class="item"]/table/tr/td[@class="text"]').map do |row|
        details << row.children.text
    end

    doc.search('//div[@id="video_genres"]/table/tr/td[@class="text"]/span[@class="genre"]/a').each do |row|
        video_genres << row.children.text << " "
    end

    doc.search('//img[@id="video_jacket_img"]').each do |row|
        video_jacket_img = row['src']
    end

    # return data format: title$id$date$director$maker$label$cast$genres$img_url
    "#{video_title}$#{details[0]}$#{details[1]}$#{details[2]}$#{details[3]}$#{details[4]}$#{details[-1]}$#{video_genres}$#{video_jacket_img}"
    #result = Hash.new
    #result["title"] = video_title; result["id"] = details[0]; result["date"] = details[1]
    #result["director"] = details[2]; result["maker"] = details[3]; result["label"] = details[4]
    #result["cast"] = details[-1]; result["genres"] = video_genres; result["img_url"] = video_jacket_img
end

#genre_hashObject



175
176
177
178
179
180
181
182
183
184
# File 'lib/javlibrary.rb', line 175

def genre_hash
    client = client()
    category_hash = Hash.new
    client.query("SELECT * FROM category").each do |item|
        category_hash["#{item['category_name']}"] = item['category_id']
    end
    client.close

    category_hash
end

#genresObject



186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/javlibrary.rb', line 186

def genres
    response = Mechanize.new; genres = Array.new
    begin
        response.get "http://www.#{@url}/cn/genres.php"
    rescue
        retry
    end

    Nokogiri::HTML(response.page.body).search('//div[@class="genreitem"]/a').each do |row|
        genres << row.children.text
    end
    genres.uniq
end

#genres_insertObject Also known as: download_all_genres



200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/javlibrary.rb', line 200

def genres_insert
    client = client()
    genres = genres()
    genres.each do |e|
        begin
            client.query("INSERT INTO category (category_name) VALUES ('#{e}')")
        rescue
            next
        end
    end

    client.close
end

#get_all_actorObject Also known as: download_all_actors



224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/javlibrary.rb', line 224

def get_all_actor
    firsturl = "http://www.#{@url}/cn/star_list.php?prefix="

    client = client()
    'A'.upto('Z') do |alphabet|
        tempurl = firsturl + alphabet
        begin
            response = RestClient.get tempurl
        rescue
            retry
        end

        doc = Nokogiri::HTML(response.body)
        last_page = author_page_num(doc)

        1.upto(last_page) do |page_num|
            temp_page_url = tempurl + "&page=#{page_num.to_s}"
            begin
                response_page = RestClient.get temp_page_url
            rescue
                retry
            end
            
            doc_page = Nokogiri::HTML(response_page.body)
            doc_page.search('//div[@class="starbox"]/div[@class="searchitem"]/a').each do |row|
                # row.text Actor.name
                # row['href'].split("=")[-1] Actor.label
                name = row.text; label = row['href'].split("=")[-1]
                begin
                    client.query("INSERT INTO actor (actor_name, actor_label, type)
                        VALUES ('#{name}', '#{label}', '#{alphabet}')")
                rescue
                    next
                end
            end
        end
    end

    client.close
end

#select_actor(type) ⇒ Object



312
313
314
315
316
317
318
319
320
# File 'lib/javlibrary.rb', line 312

def select_actor(type)
    client = client()
    result = client.query("SELECT actor_label FROM actor WHERE type='#{type}'")
    client.close

    result.each do |e|
        download_video_label(e["actor_label"])
    end
end

#video_info_insert(index, identifer, actor_hash, genres_hash) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/javlibrary.rb', line 70

def video_info_insert(index, identifer, actor_hash, genres_hash)
    client = client()

    result = downloader(identifer)
    
    return nil if result == nil
    title, id, date, director, maker, label, cast_tmp, genres_tmp, img_url = result.split('$')
    cast = cast_tmp.split.reject(&:empty?)
    genres = genres_tmp.split.reject(&:empty?)
    
    begin
        client.query("INSERT INTO video (video_id,video_name,license,url,director,label,date,maker)
        VALUES (#{index},'#{title}','#{id}','#{img_url}','#{director}','#{label}','#{date}','#{maker}')")
    rescue
        client.query("UPDATE label SET video_download=0 WHERE video_num=#{index}")
        return nil
    end
    
    cast.each do |a|
        a_tmp = actor_hash[a]
        next if a_tmp == nil
        begin
            client.query("INSERT INTO v2a (v2a_fk_video,v2a_fk_actor) VALUES(#{index}, #{a_tmp.to_i})")
        rescue
            next
        end
    end

    genres.each do |g|
        g_tmp = genres_hash[g]
        next if g_tmp == nil
        begin
            client.query("INSERT INTO v2c (v2c_fk_video,v2c_fk_category) VALUES(#{index}, #{g_tmp.to_i})")
        rescue
            next
        end
    end

    client.query("UPDATE label SET video_download=1 WHERE video_num=#{index}")
    client.close
end