Class: Pedophile::Wget

Inherits:
Object
  • Object
show all
Defined in:
lib/pedophile/wget.rb

Constant Summary collapse

TMP_PATH =
"offline_tmp"
TMP_ABSOLUTE_PATH =
File.absolute_path(TMP_PATH)
TMP_FILE_PATH =
File.absolute_path(File.join(TMP_PATH, "tmp.tmp"))
COOKIES_FILE_PATH =
File.absolute_path(File.join(TMP_PATH, "cookies.txt"))
TMP_OFFLINE_PATH =
File.join(TMP_PATH, "site")
WGET_PARAMS =
"-v --random-wait --user-agent=Mozilla/5.0 --keep-session-cookies --load-cookies #{COOKIES_FILE_PATH} --save-cookies #{COOKIES_FILE_PATH}"
WGET_RESTRICT_FILE_NAMES =

www.gnu.org/software/wget/manual/html_node/Download-Options.html WGET_RESTRICT_FILE_NAMES = “windows” # windows, ascii, unix

"unix"
WGET_MIRROR_PARAMS =
"--adjust-extension --mirror --page-requisites --convert-links --restrict-file-names=#{WGET_RESTRICT_FILE_NAMES}"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(downloader) ⇒ Wget

Returns a new instance of Wget.



18
19
20
21
# File 'lib/pedophile/wget.rb', line 18

def initialize(downloader)
  @downloader = downloader
  prepare_tmp_path
end

Instance Attribute Details

#downloaderObject (readonly)

Returns the value of attribute downloader.



23
24
25
# File 'lib/pedophile/wget.rb', line 23

def downloader
  @downloader
end

Instance Method Details

#clear!Object



45
46
47
48
# File 'lib/pedophile/wget.rb', line 45

def clear!
  FileUtils.rm_rf(TMP_ABSOLUTE_PATH)
  prepare_tmp_path
end

#download(url) ⇒ Object



30
31
32
33
# File 'lib/pedophile/wget.rb', line 30

def download(url)
  `wget #{WGET_PARAMS} #{url} -O#{TMP_FILE_PATH}`
  File.open(TMP_FILE_PATH).read
end

#mirrorObject



41
42
43
# File 'lib/pedophile/wget.rb', line 41

def mirror
  `cd #{TMP_OFFLINE_PATH}; wget #{WGET_PARAMS} #{WGET_MIRROR_PARAMS} #{self.downloader.url}`
end

#offline_pathObject



54
55
56
# File 'lib/pedophile/wget.rb', line 54

def offline_path
  File.join(TMP_OFFLINE_PATH, site_last_path)
end

#post(url, params) ⇒ Object



35
36
37
38
39
# File 'lib/pedophile/wget.rb', line 35

def post(url, params)
  post_data = params.to_query
  `wget #{WGET_PARAMS} #{url} --post-data '#{post_data}' -O#{TMP_FILE_PATH}`
  File.open(TMP_FILE_PATH).read
end

#prepare_tmp_pathObject



25
26
27
28
# File 'lib/pedophile/wget.rb', line 25

def prepare_tmp_path
  Dir.mkdir(TMP_PATH) unless File.exists?(TMP_PATH)
  Dir.mkdir(TMP_OFFLINE_PATH) unless File.exists?(TMP_OFFLINE_PATH)
end

#site_last_pathObject



50
51
52
# File 'lib/pedophile/wget.rb', line 50

def site_last_path
  (Dir.entries(Wget::TMP_OFFLINE_PATH) - ["..", "."]).first
end