Module: WebsiteCloner

Defined in:
lib/website_cloner.rb,
lib/website_cloner/utils.rb,
lib/website_cloner/parser.rb,
lib/website_cloner/downloader.rb

Defined Under Namespace

Modules: Utils Classes: Downloader, Error, Parser

Class Method Summary collapse

Class Method Details

.clone(url, output_dir, max_pages: 20, session_cookie: nil) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/website_cloner.rb', line 9

def self.clone(url, output_dir, max_pages: 20, session_cookie: nil)
  Utils.logger.info "Starting to clone #{url}"
  downloader = Downloader.new(url, output_dir, session_cookie)
  parser = Parser.new(downloader)

  visited_pages = Set.new
  pages_to_visit = [url]

  while !pages_to_visit.empty? && visited_pages.size < max_pages
    current_url = pages_to_visit.shift
    next if visited_pages.include?(current_url)

    visited_pages.add(current_url)
    Utils.logger.info "Processing page #{visited_pages.size}/#{max_pages}: #{current_url}"

    begin
      content = downloader.download_page(current_url)
      new_pages = parser.parse_and_download(content, current_url)
      pages_to_visit.concat(new_pages - visited_pages.to_a)
    rescue => e
      Utils.logger.error "Error processing #{current_url}: #{e.message}"
    end
  end

  Utils.logger.info "Finished cloning. Processed #{visited_pages.size} pages."
  Utils.logger.info "Organizing files and updating references..."
  parser.organize_files
  Utils.logger.info "Done organizing files and updating references."
end