Class: MangaDownloadr::Workflow
- Inherits:
-
Object
- Object
- MangaDownloadr::Workflow
- Defined in:
- lib/manga-downloadr.rb
Instance Attribute Summary collapse
-
#chapter_images ⇒ Object
Returns the value of attribute chapter_images.
-
#chapter_list ⇒ Object
Returns the value of attribute chapter_list.
-
#chapter_pages ⇒ Object
Returns the value of attribute chapter_pages.
-
#chapter_pages_count ⇒ Object
Returns the value of attribute chapter_pages_count.
-
#download_links ⇒ Object
Returns the value of attribute download_links.
-
#fetch_image_urls_errors ⇒ Object
Returns the value of attribute fetch_image_urls_errors.
-
#fetch_images_errors ⇒ Object
Returns the value of attribute fetch_images_errors.
-
#fetch_page_urls_errors ⇒ Object
Returns the value of attribute fetch_page_urls_errors.
-
#hydra_concurrency ⇒ Object
Returns the value of attribute hydra_concurrency.
-
#manga_name ⇒ Object
Returns the value of attribute manga_name.
-
#manga_root ⇒ Object
Returns the value of attribute manga_root.
-
#manga_root_folder ⇒ Object
Returns the value of attribute manga_root_folder.
-
#manga_root_url ⇒ Object
Returns the value of attribute manga_root_url.
-
#manga_title ⇒ Object
Returns the value of attribute manga_title.
-
#page_size ⇒ Object
Returns the value of attribute page_size.
-
#pages_per_volume ⇒ Object
Returns the value of attribute pages_per_volume.
-
#processing_state ⇒ Object
Returns the value of attribute processing_state.
Class Method Summary collapse
Instance Method Summary collapse
- #compile_ebooks! ⇒ Object
- #fetch_chapter_urls! ⇒ Object
- #fetch_image_urls! ⇒ Object
- #fetch_images! ⇒ Object
- #fetch_page_urls! ⇒ Object
-
#initialize(root_url = nil, manga_name = nil, manga_root = nil, options = {}) ⇒ Workflow
constructor
A new instance of Workflow.
- #state?(state) ⇒ Boolean
Constructor Details
#initialize(root_url = nil, manga_name = nil, manga_root = nil, options = {}) ⇒ Workflow
Returns a new instance of Workflow.
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/manga-downloadr.rb', line 31 def initialize(root_url = nil, manga_name = nil, manga_root = nil, = {}) root_url or raise ArgumentError.new("URL is required") manga_root or raise ArgumentError.new("Manga root folder is required") manga_name or raise ArgumentError.new("Manga slug is required") self.manga_root_url = root_url self.manga_root = manga_root self.manga_root_folder = File.join(manga_root, manga_name) self.manga_name = manga_name self.hydra_concurrency = [:hydra_concurrency] || 100 self.chapter_pages = {} self.chapter_images = {} self.pages_per_volume = [:pages_per_volume] || 250 self.page_size = [:page_size] || [600, 800] self.processing_state = [] self.fetch_page_urls_errors = [] self.fetch_image_urls_errors = [] self.fetch_images_errors = [] end |
Instance Attribute Details
#chapter_images ⇒ Object
Returns the value of attribute chapter_images.
26 27 28 |
# File 'lib/manga-downloadr.rb', line 26 def chapter_images @chapter_images end |
#chapter_list ⇒ Object
Returns the value of attribute chapter_list.
26 27 28 |
# File 'lib/manga-downloadr.rb', line 26 def chapter_list @chapter_list end |
#chapter_pages ⇒ Object
Returns the value of attribute chapter_pages.
26 27 28 |
# File 'lib/manga-downloadr.rb', line 26 def chapter_pages @chapter_pages end |
#chapter_pages_count ⇒ Object
Returns the value of attribute chapter_pages_count.
26 27 28 |
# File 'lib/manga-downloadr.rb', line 26 def chapter_pages_count @chapter_pages_count end |
#download_links ⇒ Object
Returns the value of attribute download_links.
26 27 28 |
# File 'lib/manga-downloadr.rb', line 26 def download_links @download_links end |
#fetch_image_urls_errors ⇒ Object
Returns the value of attribute fetch_image_urls_errors.
29 30 31 |
# File 'lib/manga-downloadr.rb', line 29 def fetch_image_urls_errors @fetch_image_urls_errors end |
#fetch_images_errors ⇒ Object
Returns the value of attribute fetch_images_errors.
29 30 31 |
# File 'lib/manga-downloadr.rb', line 29 def fetch_images_errors @fetch_images_errors end |
#fetch_page_urls_errors ⇒ Object
Returns the value of attribute fetch_page_urls_errors.
29 30 31 |
# File 'lib/manga-downloadr.rb', line 29 def fetch_page_urls_errors @fetch_page_urls_errors end |
#hydra_concurrency ⇒ Object
Returns the value of attribute hydra_concurrency.
25 26 27 |
# File 'lib/manga-downloadr.rb', line 25 def hydra_concurrency @hydra_concurrency end |
#manga_name ⇒ Object
Returns the value of attribute manga_name.
25 26 27 |
# File 'lib/manga-downloadr.rb', line 25 def manga_name @manga_name end |
#manga_root ⇒ Object
Returns the value of attribute manga_root.
25 26 27 |
# File 'lib/manga-downloadr.rb', line 25 def manga_root @manga_root end |
#manga_root_folder ⇒ Object
Returns the value of attribute manga_root_folder.
25 26 27 |
# File 'lib/manga-downloadr.rb', line 25 def manga_root_folder @manga_root_folder end |
#manga_root_url ⇒ Object
Returns the value of attribute manga_root_url.
25 26 27 |
# File 'lib/manga-downloadr.rb', line 25 def manga_root_url @manga_root_url end |
#manga_title ⇒ Object
Returns the value of attribute manga_title.
27 28 29 |
# File 'lib/manga-downloadr.rb', line 27 def manga_title @manga_title end |
#page_size ⇒ Object
Returns the value of attribute page_size.
27 28 29 |
# File 'lib/manga-downloadr.rb', line 27 def page_size @page_size end |
#pages_per_volume ⇒ Object
Returns the value of attribute pages_per_volume.
27 28 29 |
# File 'lib/manga-downloadr.rb', line 27 def pages_per_volume @pages_per_volume end |
#processing_state ⇒ Object
Returns the value of attribute processing_state.
28 29 30 |
# File 'lib/manga-downloadr.rb', line 28 def processing_state @processing_state end |
Class Method Details
.create(root_url, manga_name, manga_root, options = {}) ⇒ Object
209 210 211 212 213 |
# File 'lib/manga-downloadr.rb', line 209 def create(root_url, manga_name, manga_root, = {}) dump_file_name = "/tmp/#{manga_name}.yaml" return YAML::load(File.read(dump_file_name)) if File.exists?(dump_file_name) MangaDownloadr::Workflow.new(root_url, manga_name, manga_root, ) end |
.serialize(obj) ⇒ Object
205 206 207 |
# File 'lib/manga-downloadr.rb', line 205 def serialize(obj) File.open("/tmp/#{obj.manga_name}.yaml", 'w') {|f| f.write(YAML::dump(obj)) } end |
Instance Method Details
#compile_ebooks! ⇒ Object
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/manga-downloadr.rb', line 168 def compile_ebooks! folders = Dir[manga_root_folder + "/*/"].sort_by { |element| ary = element.split(" ").last.to_i } self.download_links = folders.inject([]) do |list, folder| list += Dir[folder + "*.*"].sort_by { |element| ary = element.split(" ").last.to_i } end # concatenating PDF files (250 pages per volume) chapter_number = 0 while !download_links.empty? chapter_number += 1 pdf_file = File.join(manga_root_folder, "#{manga_title} #{chapter_number}.pdf") list = download_links.slice!(0..pages_per_volume) Prawn::Document.generate(pdf_file, page_size: page_size) do |pdf| list.each do |image_file| begin pdf.image image_file, position: :center, vposition: :center rescue => e puts "Error in #{image_file} - #{e}" end end end print '.' end current_state :ebooks end |
#fetch_chapter_urls! ⇒ Object
55 56 57 58 59 60 61 62 |
# File 'lib/manga-downloadr.rb', line 55 def fetch_chapter_urls! doc = Nokogiri::HTML(open(manga_root_url)) self.chapter_list = doc.css("#listing a").map { |l| l['href']} self.manga_title = doc.css("#mangaproperties h1").first.text current_state :chapter_urls end |
#fetch_image_urls! ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
# File 'lib/manga-downloadr.rb', line 95 def fetch_image_urls! hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency) chapter_list.each do |chapter_key| chapter_pages[chapter_key].each do |page_link| begin request = Typhoeus::Request.new "http://www.mangareader.net#{page_link}" request.on_complete do |response| begin chapter_doc = Nokogiri::HTML(response.body) image = chapter_doc.css('#img').first tokens = image['alt'].match("^(.*?)\s\-\s(.*?)$") extension = File.extname(URI.parse(image['src']).path) chapter_images.merge!(chapter_key => []) if chapter_images[chapter_key].nil? chapter_images[chapter_key] << ImageData.new( tokens[1], "#{tokens[2]}#{extension}", image['src'] ) print '.' rescue => e self.fetch_image_urls_errors << { url: page_link, error: e } print 'x' end end hydra.queue_with_retry request rescue => e puts e end end end hydra.run unless fetch_image_urls_errors.empty? puts "\nErrors fetching image urls:" puts fetch_image_urls_errors end current_state :image_urls end |
#fetch_images! ⇒ Object
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/manga-downloadr.rb', line 131 def fetch_images! hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency) chapter_list.each_with_index do |chapter_key, chapter_index| chapter_images[chapter_key].each do |file| downloaded_filename = File.join(manga_root_folder, file.folder, file.filename) next if File.exists?(downloaded_filename) # effectively resumes the download list without re-downloading everything request = Typhoeus::Request.new file.url request.on_complete do |response| begin # download FileUtils.mkdir_p(File.join(manga_root_folder, file.folder)) File.open(downloaded_filename, "wb+") { |f| f.write response.body } # resize image = Magick::Image.read( downloaded_filename ).first resized = image.resize_to_fit(600, 800) resized.write( downloaded_filename ) { self.quality = 50 } print '.' GC.start # to avoid a leak too big (ImageMagick is notorious for that, specially on resizes) rescue => e self.fetch_images_errors << { url: file.url, error: e } print '.' end end hydra.queue_with_retry request end end hydra.run unless fetch_images_errors.empty? puts "\nErrors downloading images:" puts fetch_images_errors end current_state :images end |
#fetch_page_urls! ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/manga-downloadr.rb', line 64 def fetch_page_urls! hydra = Typhoeus::Hydra.new(max_concurrency: hydra_concurrency) chapter_list.each do |chapter_link| begin request = Typhoeus::Request.new "http://www.mangareader.net#{chapter_link}" request.on_complete do |response| begin chapter_doc = Nokogiri::HTML(response.body) pages = chapter_doc.css('#selectpage #pageMenu option') chapter_pages.merge!(chapter_link => pages.map { |p| p['value'] }) print '.' rescue => e self.fetch_page_urls_errors << { url: chapter_link, error: e, body: response.body } print 'x' end end hydra.queue_with_retry request rescue => e puts e end end hydra.run unless fetch_page_urls_errors.empty? puts "\n Errors fetching page urls:" puts fetch_page_urls_errors end self.chapter_pages_count = chapter_pages.values.inject(0) { |total, list| total += list.size } current_state :page_urls end |
#state?(state) ⇒ Boolean
195 196 197 |
# File 'lib/manga-downloadr.rb', line 195 def state?(state) self.processing_state.include?(state) end |