Module: JustFlow

Defined in:
lib/justflow.rb

Class Method Summary collapse

Class Method Details

.convert(url) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/justflow.rb', line 9

def convert(url)
  puts "Converting".yellow + " #{url}..."
  @url = url
  url_parsed = URI.parse(url)
  url_parsed2 = url_parsed.host.to_s + url_parsed.path.to_s + url_parsed.query.to_s
  target_dir = url_parsed2.gsub(/[\x00\/\\:\*\?\"<>\|]/, '_')
  ensure_mkdir(target_dir)
  Dir.chdir(target_dir)
  @doc = Nokogiri::HTML(open(url))

  get_scripts()
  get_css()
  get_images()

  File.open('index.html', 'wb') { |file|
    file.write(@doc)
  }
  puts "We done.".green
end

.download_resource(selector, source_attr, out_path) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/justflow.rb', line 86

def download_resource(selector, source_attr, out_path)
  resources = @doc.search(selector)
  resources.each { |resource|
    resource_uri = resource[source_attr]

    begin
      resource_uri = fix_uri(@url, resource_uri)
    rescue
      puts "URI is funky. Going for it anyway... #{resource_uri}".red
    end

    save_path = File.join(out_path, File.basename(resource_uri))
    save_path = remove_args(save_path)

    begin
      puts "".green + " Downloading ... " + resource_uri
      resp = get_contents(resource_uri)
      ensure_mkdir(out_path)
      save_path = save_contents(resp, save_path)
      resource[source_attr] = save_path
    rescue Exception => ex
      puts "".red + " FAIL. Couldn't do it: #{ex}"
    end

  }
end

.ensure_mkdir(dirname) ⇒ Object



61
62
63
64
65
# File 'lib/justflow.rb', line 61

def ensure_mkdir(dirname)
  if !File.directory?(dirname)
    Dir.mkdir dirname
  end
end

.fix_uri(url, uri) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/justflow.rb', line 38

def fix_uri(url, uri)
  uri = uri.strip
  if uri.start_with?('//')
    uri = 'http:' + uri
  else # relative or absolute
    begin
      uri = URI.join(url, uri).to_s
    rescue Exception => ex
      puts "x".red + " Will try to download anyway. #{ex}"
    end
  end
  return uri
end

.get_contents(uri) ⇒ Object



56
57
58
59
# File 'lib/justflow.rb', line 56

def get_contents(uri)
  uri_parsed = URI.parse(uri)
  Net::HTTP.get_response(uri_parsed)
end

.get_cssObject



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/justflow.rb', line 158

def get_css()
  cloned_doc = @doc.clone
  orig_link_tags = cloned_doc.search('link[rel=stylesheet]')
  download_resource('link[rel=stylesheet]', 'href', 'css')
  link_tags = @doc.search('link[rel=stylesheet]')
  link_tags.each_with_index { |link, idx|

    original_css_path = orig_link_tags[idx]['href']
    local_css_path = link['href']

    if File.exists?(local_css_path)
      src = ""
      File.open(local_css_path, 'r') { |file|
        src = process_css_urls(file.read(), original_css_path)
      }
      File.open(local_css_path, 'w') { |file|
        file.write(src)
      }
    end
  }
end

.get_imagesObject



150
151
152
# File 'lib/justflow.rb', line 150

def get_images()
  download_resource('img[src]', 'src', 'img')
end

.get_scriptsObject



154
155
156
# File 'lib/justflow.rb', line 154

def get_scripts()
  download_resource('script[src]', 'src', 'js')
end

.is_font?(extension) ⇒ Boolean

Returns:

  • (Boolean)


34
35
36
# File 'lib/justflow.rb', line 34

def is_font?(extension)
  return !is_img?(extension)
end

.is_img?(extension) ⇒ Boolean

Returns:

  • (Boolean)


29
30
31
32
# File 'lib/justflow.rb', line 29

def is_img?(extension)
  img_exts = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.tif', '.tiff']
  img_exts.include?(extension.downcase)
end

.process_css_urls(css_source, original_css_url) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/justflow.rb', line 113

def process_css_urls(css_source, original_css_url)
  url_regex = /url\(['"]?(.*?)['"]?\)/i

  if original_css_url.start_with?('//')
    original_css_url = 'http:' + original_css_url
  elsif original_css_url.start_with?('.') || original_css_url.start_with?('/')
    original_css_url = fix_uri(@url, original_css_url)
  end

  puts ">".yellow + " Parsing css ... #{original_css_url}"

  css_source = css_source.gsub(url_regex) {
    original_item_url = $1
    absolute_item_url = fix_uri(original_css_url, original_item_url)
    original_item_url = remove_args(original_item_url)

    extension = File.extname(original_item_url)
    basename  = File.basename(original_item_url)

    out_dir = is_img?(extension) ? 'img' : 'fonts'
    ensure_mkdir(out_dir)
    out_path = File.join(out_dir, basename)

    begin
      resp = get_contents(absolute_item_url)
      ensure_mkdir(out_dir)
      save_contents(resp, out_path)
    rescue Exception => ex
      puts "Failed. Couldnt download from CSS: #{ex}".red
    end

    "url('#{File.join('..', out_path)}')"
  }

  return css_source
end

.remove_args(url) ⇒ Object



82
83
84
# File 'lib/justflow.rb', line 82

def remove_args(url)
  url[/[^\?]+/]
end

.save_contents(resp, save_path) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/justflow.rb', line 67

def save_contents(resp, save_path)
  if File.file? save_path
    extension = File.extname(save_path)
    basename = File.basename(save_path)
    filename = File.basename(save_path, extension)
    if (!is_img?(extension))
      save_path = save_path.gsub(filename, filename + "_" + Time.now.to_i.to_s)
    end
  end
  File.open(save_path, 'wb') { |file|
    file.write(resp.body)
  }
  return save_path
end

.valid_uri_scheme?(uri) ⇒ Boolean

Returns:

  • (Boolean)


52
53
54
# File 'lib/justflow.rb', line 52

def valid_uri_scheme?(uri)
  uri.start_with?('http') || uri.start_with?('https')
end