Class: MangaDownloadr::Workflow

Inherits:
Object
  • Object
show all
Defined in:
lib/manga-downloadr.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(root_url = nil, manga_name = nil, manga_root = nil, options = {}) ⇒ Workflow

Returns a new instance of Workflow.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/manga-downloadr.rb', line 31

def initialize(root_url = nil, manga_name = nil, manga_root = nil, options = {})
  root_url or raise ArgumentError.new("URL is required")
  manga_root or raise ArgumentError.new("Manga root folder is required")
  manga_name or raise ArgumentError.new("Manga slug is required")

  self.manga_root_url    = root_url
  self.manga_root        = manga_root
  self.manga_root_folder = File.join(manga_root, manga_name)
  self.manga_name        = manga_name

  self.hydra_concurrency = options[:hydra_concurrency] || 100

  self.chapter_pages    = {}
  self.chapter_images   = {}

  self.pages_per_volume = options[:pages_per_volume] || 250
  self.page_size        = options[:page_size] || [600, 800]

  self.processing_state        = []
  self.fetch_page_urls_errors  = []
  self.fetch_image_urls_errors = []
  self.fetch_images_errors     = []
end

Instance Attribute Details

#chapter_imagesObject

Returns the value of attribute chapter_images.



26
27
28
# File 'lib/manga-downloadr.rb', line 26

def chapter_images
  @chapter_images
end

#chapter_listObject

Returns the value of attribute chapter_list.



26
27
28
# File 'lib/manga-downloadr.rb', line 26

def chapter_list
  @chapter_list
end

#chapter_pagesObject

Returns the value of attribute chapter_pages.



26
27
28
# File 'lib/manga-downloadr.rb', line 26

def chapter_pages
  @chapter_pages
end

#chapter_pages_countObject

Returns the value of attribute chapter_pages_count.



26
27
28
# File 'lib/manga-downloadr.rb', line 26

def chapter_pages_count
  @chapter_pages_count
end

Returns the value of attribute download_links.



26
27
28
# File 'lib/manga-downloadr.rb', line 26

def download_links
  @download_links
end

#fetch_image_urls_errorsObject

Returns the value of attribute fetch_image_urls_errors.



29
30
31
# File 'lib/manga-downloadr.rb', line 29

def fetch_image_urls_errors
  @fetch_image_urls_errors
end

#fetch_images_errorsObject

Returns the value of attribute fetch_images_errors.



29
30
31
# File 'lib/manga-downloadr.rb', line 29

def fetch_images_errors
  @fetch_images_errors
end

#fetch_page_urls_errorsObject

Returns the value of attribute fetch_page_urls_errors.



29
30
31
# File 'lib/manga-downloadr.rb', line 29

def fetch_page_urls_errors
  @fetch_page_urls_errors
end

#hydra_concurrencyObject

Returns the value of attribute hydra_concurrency.



25
26
27
# File 'lib/manga-downloadr.rb', line 25

def hydra_concurrency
  @hydra_concurrency
end

#manga_nameObject

Returns the value of attribute manga_name.



25
26
27
# File 'lib/manga-downloadr.rb', line 25

def manga_name
  @manga_name
end

#manga_rootObject

Returns the value of attribute manga_root.



25
26
27
# File 'lib/manga-downloadr.rb', line 25

def manga_root
  @manga_root
end

#manga_root_folderObject

Returns the value of attribute manga_root_folder.



25
26
27
# File 'lib/manga-downloadr.rb', line 25

def manga_root_folder
  @manga_root_folder
end

#manga_root_urlObject

Returns the value of attribute manga_root_url.



25
26
27
# File 'lib/manga-downloadr.rb', line 25

def manga_root_url
  @manga_root_url
end

#manga_titleObject

Returns the value of attribute manga_title.



27
28
29
# File 'lib/manga-downloadr.rb', line 27

def manga_title
  @manga_title
end

#page_sizeObject

Returns the value of attribute page_size.



27
28
29
# File 'lib/manga-downloadr.rb', line 27

def page_size
  @page_size
end

#pages_per_volumeObject

Returns the value of attribute pages_per_volume.



27
28
29
# File 'lib/manga-downloadr.rb', line 27

def pages_per_volume
  @pages_per_volume
end

#processing_stateObject

Returns the value of attribute processing_state.



28
29
30
# File 'lib/manga-downloadr.rb', line 28

def processing_state
  @processing_state
end

Class Method Details

.create(root_url, manga_name, manga_root, options = {}) ⇒ Object



209
210
211
212
213
# File 'lib/manga-downloadr.rb', line 209

def create(root_url, manga_name, manga_root, options = {})
  dump_file_name = "/tmp/#{manga_name}.yaml"
  return YAML::load(File.read(dump_file_name)) if File.exists?(dump_file_name)
  MangaDownloadr::Workflow.new(root_url, manga_name, manga_root, options)
end

.serialize(obj) ⇒ Object



205
206
207
# File 'lib/manga-downloadr.rb', line 205

def serialize(obj)
  File.open("/tmp/#{obj.manga_name}.yaml", 'w') {|f| f.write(YAML::dump(obj)) }
end

Instance Method Details

#compile_ebooks!Object



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/manga-downloadr.rb', line 168

def compile_ebooks!
  folders = Dir[manga_root_folder + "/*/"].sort_by { |element| ary = element.split(" ").last.to_i }
  self.download_links = folders.inject([]) do |list, folder|
    list += Dir[folder + "*.*"].sort_by { |element| ary = element.split(" ").last.to_i }
  end

  # concatenating PDF files (250 pages per volume)
  chapter_number = 0
  while !download_links.empty?
    chapter_number += 1
    pdf_file = File.join(manga_root_folder, "#{manga_title} #{chapter_number}.pdf")
    list = download_links.slice!(0..pages_per_volume)
    Prawn::Document.generate(pdf_file, page_size: page_size) do |pdf|
      list.each do |image_file|
        begin
          pdf.image image_file, position: :center, vposition: :center
        rescue => e
          puts "Error in #{image_file} - #{e}"
        end
      end
    end
    print '.'
  end

  current_state :ebooks
end

#fetch_chapter_urls!Object



55
56
57
58
59
60
61
62
# File 'lib/manga-downloadr.rb', line 55

def fetch_chapter_urls!
  doc = Nokogiri::HTML(open(manga_root_url))

  self.chapter_list = doc.css("#listing a").map { |l| l['href']}
  self.manga_title  = doc.css("#mangaproperties h1").first.text

  current_state :chapter_urls
end

#fetch_image_urls!Object



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/manga-downloadr.rb', line 95

def fetch_image_urls!
  hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
  chapter_list.each do |chapter_key|
    chapter_pages[chapter_key].each do |page_link|
      begin
        request = Typhoeus::Request.new "http://www.mangareader.net#{page_link}"
        request.on_complete do |response|
          begin
            chapter_doc = Nokogiri::HTML(response.body)
            image       = chapter_doc.css('#img').first
            tokens      = image['alt'].match("^(.*?)\s\-\s(.*?)$")
            extension   = File.extname(URI.parse(image['src']).path)

            chapter_images.merge!(chapter_key => []) if chapter_images[chapter_key].nil?
            chapter_images[chapter_key] << ImageData.new( tokens[1], "#{tokens[2]}#{extension}", image['src'] )
            print '.'
          rescue => e
            self.fetch_image_urls_errors << { url: page_link, error: e }
            print 'x'
          end
        end
        hydra.queue_with_retry request
      rescue => e
        puts e
      end
    end
  end
  hydra.run
  unless fetch_image_urls_errors.empty?
    puts "\nErrors fetching image urls:"
    puts fetch_image_urls_errors
  end

  current_state :image_urls
end

#fetch_images!Object



131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/manga-downloadr.rb', line 131

def fetch_images!
  hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
  chapter_list.each_with_index do |chapter_key, chapter_index|
    chapter_images[chapter_key].each do |file|
        downloaded_filename = File.join(manga_root_folder, file.folder, file.filename)
        next if File.exists?(downloaded_filename) # effectively resumes the download list without re-downloading everything
        request = Typhoeus::Request.new file.url
        request.on_complete do |response|
          begin
            # download
            FileUtils.mkdir_p(File.join(manga_root_folder, file.folder))
            File.open(downloaded_filename, "wb+") { |f| f.write response.body }

            # resize
            image = Magick::Image.read( downloaded_filename ).first
            resized = image.resize_to_fit(600, 800)
            resized.write( downloaded_filename ) { self.quality = 50 }

            print '.'
            GC.start # to avoid a leak too big (ImageMagick is notorious for that, specially on resizes)
          rescue => e
            self.fetch_images_errors << { url: file.url, error: e }
            print '.'
          end
        end
      hydra.queue_with_retry request
    end
  end
  hydra.run
  unless fetch_images_errors.empty?
    puts "\nErrors downloading images:"
    puts fetch_images_errors
  end

  current_state :images
end

#fetch_page_urls!Object



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/manga-downloadr.rb', line 64

def fetch_page_urls!
  hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency)
  chapter_list.each do |chapter_link|
    begin
      request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}"
      request.on_complete do |response|
        begin
          chapter_doc = Nokogiri::HTML(response.body)
          pages = chapter_doc.css('#selectpage #pageMenu option')
          chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] })
          print '.'
        rescue => e
          self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body }
          print 'x'
        end
      end
      hydra.queue_with_retry request
    rescue => e
      puts e
    end
  end
  hydra.run
  unless fetch_page_urls_errors.empty?
    puts "\n Errors fetching page urls:"
    puts fetch_page_urls_errors
  end

  self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size }
  current_state :page_urls
end

#state?(state) ⇒ Boolean

Returns:

  • (Boolean)


195
196
197
# File 'lib/manga-downloadr.rb', line 195

def state?(state)
  self.processing_state.include?(state)
end