Module: WebRules

Defined in:
lib/web_rules.rb,
lib/web_rules/version.rb

Constant Summary collapse

VERSION =
"0.0.2"

Instance Method Summary collapse

Instance Method Details

#recurse_select_refs(host, url, recurse, id) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/web_rules.rb', line 8

def recurse_select_refs(host, url, recurse, id)
  return if recurse <= 0 
  result = @objects[id] 
  begin
    doc = ''
    status = Timeout::timeout(4) do
      doc = Nokogiri::HTML(open(url))
    end
    # puts status
  rescue Exception => e
    puts e,"----",url
    return nil
  end
  refs =  doc.xpath('//a')
  refs.each do |r|
    begin
      uri = URI.parse(r['href'])
    rescue Exception => e
      puts e 
      puts uri.to_s
      next
    end
    result[r['href']] = url
    print "."
    a = r['href']
    begin
      a = URI::HTTP.build({:host => URI.parse(host).host, :path => r['href']}).to_s if uri.host.nil?
    rescue
      next
    end
    if !uri.host.nil? and uri.host == URI.parse(host).host
      recurse_select_refs(host, a, recurse - 1, id)
    end
    # puts recurse.to_s + ' - ' + r['href'] 
  end
end

#web_hostObject



59
60
61
62
63
# File 'lib/web_rules.rb', line 59

def web_host
  url = @data_stack.pop
  uri = URI.parse(url)
  @data_stack.push URI::HTTP.build({host: uri.host}).to_s
end


45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/web_rules.rb', line 45

def web_select_all_links
  level = @data_stack.pop
  url = @data_stack.pop
  uri = URI.parse(url)
  host = URI::HTTP.build({host: uri.host}).to_s
  id = unique_id
  @objects[id] = {}
  recurse_select_refs(host, url, level, id)
  puts "+"
  # puts "-----------------------", @objects[id].length
  @data_stack.push id
end