Module: Trackman::Urls::HtmlParser

Includes:
CssParser
Included in:
Assets::HtmlAsset
Defined in:
lib/trackman/urls/html_parser.rb

Instance Method Summary collapse

Methods included from CssParser

#clean_comments, #parse_css

Instance Method Details

#css(doc) ⇒ Object



22
23
24
# File 'lib/trackman/urls/html_parser.rb', line 22

def css doc
  refine(doc.xpath('//link[@type="text/css"]'), 'href')
end

#img(doc) ⇒ Object



12
13
14
15
16
17
# File 'lib/trackman/urls/html_parser.rb', line 12

def img doc
  imgs = refine(doc.css('img'), 'src')
  icons = refine(doc.xpath('//link[@rel="icon"]'), 'href')
  
  imgs + icons
end

#js(doc) ⇒ Object



19
20
21
# File 'lib/trackman/urls/html_parser.rb', line 19

def js doc
  refine(doc.xpath('//script'), 'src')
end

#parse(html) ⇒ Object



7
8
9
10
# File 'lib/trackman/urls/html_parser.rb', line 7

def parse html
  doc = Nokogiri::HTML(html)
  (img(doc) + js(doc) + css(doc) + parse_css(html)).uniq
end

#refine(paths, node) ⇒ Object



26
27
28
29
# File 'lib/trackman/urls/html_parser.rb', line 26

def refine(paths, node)
  temp = paths.map{|n| n[node].to_s.gsub(/\?[^\?]*$/, '') }
  temp.select{|n| n && n =~ /\w/ && n.internal_path? && !n.embedded? }
end