Class: Javlibrary

Inherits:
Object
  • Object
show all
Defined in:
lib/javlibrary.rb,
lib/javlibrary/version.rb

Constant Summary collapse

JAVLIBRARY_URL =
[ "jav11b.com", "javlibrary.com" ]
VERSION =
"0.2.12"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(database_name = 'javlibrary', user = 'root', pwd = 'default') ⇒ Javlibrary

Returns a new instance of Javlibrary.



12
13
14
15
16
17
18
19
20
# File 'lib/javlibrary.rb', line 12

def initialize(database_name = 'javlibrary', user = 'root', pwd = 'default')
    # Define client variable
    @database = database_name
    @username = user
    @password = pwd

    # Define default Javlibrary url
    @url = JAVLIBRARY_URL[1]
end

Instance Attribute Details

#databaseObject

Returns the value of attribute database.



22
23
24
# File 'lib/javlibrary.rb', line 22

def database
  @database
end

#passwordObject

Returns the value of attribute password.



22
23
24
# File 'lib/javlibrary.rb', line 22

def password
  @password
end

#urlObject

Returns the value of attribute url.



22
23
24
# File 'lib/javlibrary.rb', line 22

def url
  @url
end

#usernameObject

Returns the value of attribute username.



22
23
24
# File 'lib/javlibrary.rb', line 22

def username
  @username
end

Instance Method Details

#actor_hashObject



158
159
160
161
162
163
164
165
166
167
# File 'lib/javlibrary.rb', line 158

def actor_hash
    client = client()
    actor_hash = Hash.new
    client.query("SELECT * FROM actor").each do |item|
        actor_hash["#{item['actor_name']}"] = item['actor_id']
    end
    client.close

    actor_hash
end

#author_page_num(nokogiri_doc) ⇒ Object



210
211
212
213
214
215
216
# File 'lib/javlibrary.rb', line 210

def author_page_num(nokogiri_doc)
    last_page = 1
    nokogiri_doc.search('//div[@class="page_selector"]/a[@class="page last"]').each do |row|
        last_page = row['href'].split("=")[-1].to_i
    end
    last_page
end

#clientObject



24
25
26
27
28
29
# File 'lib/javlibrary.rb', line 24

def client()
    client = Mysql2::Client.new(:host => "127.0.0.1",
                                :username => "#{@username}",
                                :password => "#{@password}",
                                :database => "#{@database}")
end

#download_all_video_labelsObject



316
317
318
319
320
321
322
323
324
325
# File 'lib/javlibrary.rb', line 316

def download_all_video_labels
    thread_pool =[]
    'A'.upto('Z').each do |alphabet|
        thread_temp = Thread.new{
            select_actor(alphabet)
        }
        thread_pool << thread_temp
    end
    thread_pool.map(&:join)
end

#download_all_videosObject



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/javlibrary.rb', line 142

def download_all_videos
    client = client()
    result = client.query("SELECT * FROM label WHERE video_download=0")
    actor_hash = actor_hash()
    genre_hash = genre_hash()
    result.each do |item|
        begin
            video_info_insert(client, item['video_num'], item['video_label'],
                actor_hash, genre_hash)
        rescue
            next
        end
    end
    client.close
end

#download_all_videos_threadObject



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/javlibrary.rb', line 108

def download_all_videos_thread
    client = client()
    result = client.query("SELECT video_num, video_label FROM label WHERE video_download=0")
    client.close

    video_array = Array.new
    result.each do |item|
        video_array << item
    end

    video_array = video_array.each_slice(5000).to_a

    actor_hash = actor_hash()
    genre_hash = genre_hash()
    thread_pool = Array::new

    video_array.each do |group|
        # Create a download thread
        thread_temp = Thread.new {
            group.each do |item|
                begin
                    video_info_insert(item['video_num'], item['video_label'],
                        actor_hash, genre_hash)
                rescue
                    next
                end
            end
        }
        thread_pool << thread_temp
    end

    thread_pool.map(&:join)
end

#download_video_label(actor_id) ⇒ Object



261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
# File 'lib/javlibrary.rb', line 261

def download_video_label(actor_id)
    firsturl = "http://www.#{@url}/ja/vl_star.php?s=#{actor_id}"
    baseurl = "http://www.#{@url}/ja/vl_star.php?&mode=&s=#{actor_id}&page="

    begin
        response = RestClient.get firsturl
    rescue
        retry
    end

    doc = Nokogiri::HTML(response.body)
    last_page = 1
    doc.search('//div[@class="page_selector"]/a[@class="page last"]').each do |row|
        last_page = row['href'].split("=")[-1].to_i
    end

    result = []
    1.upto(last_page) do |page|
        tempurl = baseurl + page.to_s
        begin
            response = RestClient.get tempurl
        rescue
            retry
        end

        Nokogiri::HTML(response.body).search('//div[@class="video"]/a').each do |row|
            # Data:
            # Video_label: row['href'].split("=")[-1]
            # Video_title: row['title']
            # client.query("INSERT INTO label (lable) VALUES ('#{row['href'].split("=")[-1]}')")
            result << row['href'].split("=")[-1]
        end
    end

    client = client()
    result.each do |e|
        begin
            client.query("INSERT INTO label (video_label, video_download) VALUES ('#{e}', '0')")
        rescue
            next
        end
    end
    client.close
end

#downloader(identifer) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/javlibrary.rb', line 31

def downloader(identifer)
    baseurl = "http://www.#{@url}/cn/?v=#{identifer}"
    response = Mechanize.new
    response.user_agent = Mechanize::AGENT_ALIASES.values[rand(21)]
    begin
        response.get baseurl
    rescue Timeout::Error
        retry
    rescue
        return
    end

    doc = Nokogiri::HTML(response.page.body)

    video_title, details, video_genres, video_jacket_img = String.new, Array.new, String.new, String.new

    video_title = doc.search('div[@id="video_title"]/h3/a').children.text
    doc.search('//div[@id="video_info"]/div[@class="item"]/table/tr/td[@class="text"]').map do |row|
        details << row.children.text
    end

    doc.search('//div[@id="video_genres"]/table/tr/td[@class="text"]/span[@class="genre"]/a').each do |row|
        video_genres << row.children.text << " "
    end

    doc.search('//img[@id="video_jacket_img"]').each do |row|
        video_jacket_img = row['src']
    end

    # return data format: title$id$date$director$maker$label$cast$genres$img_url
    "#{video_title}$#{details[0]}$#{details[1]}$#{details[2]}$#{details[3]}$#{details[4]}$#{details[-1]}$#{video_genres}$#{video_jacket_img}"
    #result = Hash.new
    #result["title"] = video_title; result["id"] = details[0]; result["date"] = details[1]
    #result["director"] = details[2]; result["maker"] = details[3]; result["label"] = details[4]
    #result["cast"] = details[-1]; result["genres"] = video_genres; result["img_url"] = video_jacket_img
end

#genre_hashObject



169
170
171
172
173
174
175
176
177
178
# File 'lib/javlibrary.rb', line 169

def genre_hash
    client = client()
    category_hash = Hash.new
    client.query("SELECT * FROM category").each do |item|
        category_hash["#{item['category_name']}"] = item['category_id']
    end
    client.close

    category_hash
end

#genresObject



180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/javlibrary.rb', line 180

def genres
    response = Mechanize.new; genres = Array.new
    begin
        response.get "http://www.#{@url}/cn/genres.php"
    rescue
        retry
    end

    Nokogiri::HTML(response.page.body).search('//div[@class="genreitem"]/a').each do |row|
        genres << row.children.text
    end
    genres.uniq
end

#genres_insertObject Also known as: download_all_genres



194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/javlibrary.rb', line 194

def genres_insert
    client = client()
    genres = genres()
    genres.each do |e|
        begin
            client.query("INSERT INTO category (category_name) VALUES ('#{e}')")
        rescue
            next
        end
    end

    client.close
end

#get_all_actorObject Also known as: download_all_actors



218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# File 'lib/javlibrary.rb', line 218

def get_all_actor
    firsturl = "http://www.#{@url}/cn/star_list.php?prefix="

    client = client()
    'A'.upto('Z') do |alphabet|
        tempurl = firsturl + alphabet
        begin
            response = RestClient.get tempurl
        rescue
            retry
        end

        doc = Nokogiri::HTML(response.body)
        last_page = author_page_num(doc)

        1.upto(last_page) do |page_num|
            temp_page_url = tempurl + "&page=#{page_num.to_s}"
            begin
                response_page = RestClient.get temp_page_url
            rescue
                retry
            end

            doc_page = Nokogiri::HTML(response_page.body)
            doc_page.search('//div[@class="starbox"]/div[@class="searchitem"]/a').each do |row|
                # row.text Actor.name
                # row['href'].split("=")[-1] Actor.label
                name = row.text; label = row['href'].split("=")[-1]
                begin
                    client.query("INSERT INTO actor (actor_name, actor_label, type)
                        VALUES ('#{name}', '#{label}', '#{alphabet}')")
                rescue
                    next
                end
            end
        end
    end

    client.close
end

#select_actor(type) ⇒ Object



306
307
308
309
310
311
312
313
314
# File 'lib/javlibrary.rb', line 306

def select_actor(type)
    client = client()
    result = client.query("SELECT actor_label FROM actor WHERE type='#{type}'")
    client.close

    result.each do |e|
        download_video_label(e["actor_label"])
    end
end

#video_info_insert(index, identifer, actor_hash, genres_hash) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/javlibrary.rb', line 68

def video_info_insert(index, identifer, actor_hash, genres_hash)
    client = client()

    result = downloader(identifer)

    return nil if result == nil
    title, id, date, director, maker, label, cast_tmp, genres_tmp, img_url = result.split('$')
    cast = cast_tmp.split.reject(&:empty?)
    genres = genres_tmp.split.reject(&:empty?)
    begin
        client.query("INSERT INTO video (video_id,video_name,license,url,director,label,date,maker)
        VALUES (#{index},'#{title}','#{id}','#{img_url}','#{director}','#{label}','#{date}','#{maker}')")
    rescue
        client.query("UPDATE label SET video_download=1 WHERE video_num=#{index}")
        return nil
    end
    cast.each do |a|
        a_tmp = actor_hash[a]
        next if a_tmp == nil
        begin
            client.query("INSERT INTO v2a (v2a_fk_video,v2a_fk_actor) VALUES(#{index}, #{a_tmp.to_i})")
        rescue
            next
        end
    end

    genres.each do |g|
        g_tmp = genres_hash[g]
        next if g_tmp == nil
        begin
            client.query("INSERT INTO v2c (v2c_fk_video,v2c_fk_category) VALUES(#{index}, #{g_tmp.to_i})")
        rescue
            next
        end
    end

    client.query("UPDATE label SET video_download=1 WHERE video_num=#{index}")
    client.close
end