Module: JustFlow
- Defined in:
- lib/justflow.rb
Class Method Summary collapse
- .convert(url) ⇒ Object
- .download_resource(selector, source_attr, out_path) ⇒ Object
- .ensure_mkdir(dirname) ⇒ Object
- .fix_uri(url, uri) ⇒ Object
- .get_contents(uri) ⇒ Object
- .get_css ⇒ Object
- .get_images ⇒ Object
- .get_scripts ⇒ Object
- .is_font?(extension) ⇒ Boolean
- .is_img?(extension) ⇒ Boolean
- .process_css_urls(css_source, original_css_url) ⇒ Object
- .remove_args(url) ⇒ Object
- .save_contents(resp, save_path) ⇒ Object
- .valid_uri_scheme?(uri) ⇒ Boolean
Class Method Details
.convert(url) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/justflow.rb', line 9 def convert(url) puts "Converting".yellow + " #{url}..." @url = url url_parsed = URI.parse(url) url_parsed2 = url_parsed.host.to_s + url_parsed.path.to_s + url_parsed.query.to_s target_dir = url_parsed2.gsub(/[\x00\/\\:\*\?\"<>\|]/, '_') ensure_mkdir(target_dir) Dir.chdir(target_dir) @doc = Nokogiri::HTML(open(url)) get_scripts() get_css() get_images() File.open('index.html', 'wb') { |file| file.write(@doc) } puts "We done.".green end |
.download_resource(selector, source_attr, out_path) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
# File 'lib/justflow.rb', line 86 def download_resource(selector, source_attr, out_path) resources = @doc.search(selector) resources.each { |resource| resource_uri = resource[source_attr] begin resource_uri = fix_uri(@url, resource_uri) rescue puts "URI is funky. Going for it anyway... #{resource_uri}".red end save_path = File.join(out_path, File.basename(resource_uri)) save_path = remove_args(save_path) begin puts "✓".green + " Downloading ... " + resource_uri resp = get_contents(resource_uri) ensure_mkdir(out_path) save_path = save_contents(resp, save_path) resource[source_attr] = save_path rescue Exception => ex puts "✗".red + " FAIL. Couldn't do it: #{ex}" end } end |
.ensure_mkdir(dirname) ⇒ Object
61 62 63 64 65 |
# File 'lib/justflow.rb', line 61 def ensure_mkdir(dirname) if !File.directory?(dirname) Dir.mkdir dirname end end |
.fix_uri(url, uri) ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/justflow.rb', line 38 def fix_uri(url, uri) uri = uri.strip if uri.start_with?('//') uri = 'http:' + uri else # relative or absolute begin uri = URI.join(url, uri).to_s rescue Exception => ex puts "x".red + " Will try to download anyway. #{ex}" end end return uri end |
.get_contents(uri) ⇒ Object
56 57 58 59 |
# File 'lib/justflow.rb', line 56 def get_contents(uri) uri_parsed = URI.parse(uri) Net::HTTP.get_response(uri_parsed) end |
.get_css ⇒ Object
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
# File 'lib/justflow.rb', line 158 def get_css() cloned_doc = @doc.clone = cloned_doc.search('link[rel=stylesheet]') download_resource('link[rel=stylesheet]', 'href', 'css') = @doc.search('link[rel=stylesheet]') .each_with_index { |link, idx| original_css_path = [idx]['href'] local_css_path = link['href'] if File.exists?(local_css_path) src = "" File.open(local_css_path, 'r') { |file| src = process_css_urls(file.read(), original_css_path) } File.open(local_css_path, 'w') { |file| file.write(src) } end } end |
.get_images ⇒ Object
150 151 152 |
# File 'lib/justflow.rb', line 150 def get_images() download_resource('img[src]', 'src', 'img') end |
.get_scripts ⇒ Object
154 155 156 |
# File 'lib/justflow.rb', line 154 def get_scripts() download_resource('script[src]', 'src', 'js') end |
.is_font?(extension) ⇒ Boolean
34 35 36 |
# File 'lib/justflow.rb', line 34 def is_font?(extension) return !is_img?(extension) end |
.is_img?(extension) ⇒ Boolean
29 30 31 32 |
# File 'lib/justflow.rb', line 29 def is_img?(extension) img_exts = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.tif', '.tiff'] img_exts.include?(extension.downcase) end |
.process_css_urls(css_source, original_css_url) ⇒ Object
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/justflow.rb', line 113 def process_css_urls(css_source, original_css_url) url_regex = /url\(['"]?(.*?)['"]?\)/i if original_css_url.start_with?('//') original_css_url = 'http:' + original_css_url elsif original_css_url.start_with?('.') || original_css_url.start_with?('/') original_css_url = fix_uri(@url, original_css_url) end puts ">".yellow + " Parsing css ... #{original_css_url}" css_source = css_source.gsub(url_regex) { original_item_url = $1 absolute_item_url = fix_uri(original_css_url, original_item_url) original_item_url = remove_args(original_item_url) extension = File.extname(original_item_url) basename = File.basename(original_item_url) out_dir = is_img?(extension) ? 'img' : 'fonts' ensure_mkdir(out_dir) out_path = File.join(out_dir, basename) begin resp = get_contents(absolute_item_url) ensure_mkdir(out_dir) save_contents(resp, out_path) rescue Exception => ex puts "Failed. Couldnt download from CSS: #{ex}".red end "url('#{File.join('..', out_path)}')" } return css_source end |
.remove_args(url) ⇒ Object
82 83 84 |
# File 'lib/justflow.rb', line 82 def remove_args(url) url[/[^\?]+/] end |
.save_contents(resp, save_path) ⇒ Object
67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/justflow.rb', line 67 def save_contents(resp, save_path) if File.file? save_path extension = File.extname(save_path) basename = File.basename(save_path) filename = File.basename(save_path, extension) if (!is_img?(extension)) save_path = save_path.gsub(filename, filename + "_" + Time.now.to_i.to_s) end end File.open(save_path, 'wb') { |file| file.write(resp.body) } return save_path end |
.valid_uri_scheme?(uri) ⇒ Boolean
52 53 54 |
# File 'lib/justflow.rb', line 52 def valid_uri_scheme?(uri) uri.start_with?('http') || uri.start_with?('https') end |