Module: WebPageArchiver::GeneratorHelpers

Included in:
DataUriHtmlGenerator, InlineHtmlGenerator, MhtmlGenerator
Defined in:
lib/web_page_archiver.rb

Overview

Generic methods To reuse in both the MhtmlGenerator as the InlineHtmlGenerator

Instance Method Summary collapse

Instance Method Details

#content_type(object) ⇒ String

Determines the conttent type of a file or download



58
59
60
61
62
63
64
# File 'lib/web_page_archiver.rb', line 58

def content_type(object)
  if object.is_a? File
    return MIME::Types.type_for(object.path).first
  else
    return object.meta["content-type"]
  end
end

#download_finished?Boolean

Tests wether all the required content has been downloaded



87
88
89
# File 'lib/web_page_archiver.rb', line 87

def download_finished?
  @contents.find{|k,v| v[:body] == nil } == nil
end

#initializeObject



16
17
18
19
20
21
22
23
# File 'lib/web_page_archiver.rb', line 16

def initialize
  @contents = {}
  @src = StringIO.new
  @boundary = "mimepart_#{Digest::MD5.hexdigest(Time.now.to_s)}"
  @threads  = []
  @queue    = Queue.new
  @conf     = { :base64_except=>["html"] }
end

#join_uri(base_filename_or_uri, path) ⇒ String

Creates a absolute URI-string for referenced resources in base file name



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/web_page_archiver.rb', line 30

def join_uri(base_filename_or_uri, path)
  stream = open(base_filename_or_uri)
  joined = ""
  if stream.is_a? File
    base_filename_or_uri = base_filename_or_uri.path if base_filename_or_uri.is_a? File
    
    windows_drive_matcher = /((.*):\/)/
    windows_drive_match_data = base_filename_or_uri.match windows_drive_matcher
    if windows_drive_match_data
      base_filename_or_uri = base_filename_or_uri.gsub(windows_drive_matcher,'WINDOWS.DRIVE/')
    end
    
    joined = URI::join("file://#{base_filename_or_uri}", path)
    joined = joined.to_s.gsub('file://','').gsub('file:','')
    
    if windows_drive_match_data
      joined = joined.gsub('WINDOWS.DRIVE/',windows_drive_match_data[1])
    end
  else
    joined = URI::join(base_filename_or_uri, path)
  end
  return joined.to_s
end

#start_download_thread(num = 5) ⇒ Array<Thread>

Processes the download queue



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/web_page_archiver.rb', line 70

def start_download_thread(num=5)
  num.times{
    t = Thread.start{
      while(@queue.empty? == false)
        k = @queue.pop
        next if @contents[k][:body] != nil
        v = @contents[k][:uri]
        f = open(v)
        @contents[k] = @contents[k].merge({ :body=>f.read, :uri=> v, :content_type=> content_type(f) })
      end
    }
    @threads.push t
  }
  return @threads
end