Module: WebsiteCloner
- Defined in:
- lib/website_cloner.rb,
lib/website_cloner/utils.rb,
lib/website_cloner/parser.rb,
lib/website_cloner/downloader.rb
Defined Under Namespace
Modules: Utils Classes: Downloader, Error, Parser
Class Method Summary collapse
Class Method Details
.clone(url, output_dir, max_pages: 20, session_cookie: nil) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/website_cloner.rb', line 9 def self.clone(url, output_dir, max_pages: 20, session_cookie: nil) Utils.logger.info "Starting to clone #{url}" downloader = Downloader.new(url, output_dir, ) parser = Parser.new(downloader) visited_pages = Set.new pages_to_visit = [url] while !pages_to_visit.empty? && visited_pages.size < max_pages current_url = pages_to_visit.shift next if visited_pages.include?(current_url) visited_pages.add(current_url) Utils.logger.info "Processing page #{visited_pages.size}/#{max_pages}: #{current_url}" begin content = downloader.download_page(current_url) new_pages = parser.parse_and_download(content, current_url) pages_to_visit.concat(new_pages - visited_pages.to_a) rescue => e Utils.logger.error "Error processing #{current_url}: #{e.}" end end Utils.logger.info "Finished cloning. Processed #{visited_pages.size} pages." Utils.logger.info "Organizing files and updating references..." parser.organize_files Utils.logger.info "Done organizing files and updating references." end |