Class: KleosTest::LinksCollector

Inherits:
Object
  • Object
show all
Defined in:
lib/kleos_test/links_collector.rb

Constant Summary collapse

['/']
[]
[]
{ valid: [], invalid: [] }
{ valid: [], invalid: [] }
@@inside_downloads =
0

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeLinksCollector

Returns a new instance of LinksCollector.



9
10
11
# File 'lib/kleos_test/links_collector.rb', line 9

def initialize
  @target = get_target_link
end

Class Method Details



104
105
106
# File 'lib/kleos_test/links_collector.rb', line 104

def inside_links
  @@inside_links[:valid] | @@inside_links[:invalid]
end


112
113
114
# File 'lib/kleos_test/links_collector.rb', line 112

def invalid_inside_links
  @@inside_links[:invalid]
end


124
125
126
# File 'lib/kleos_test/links_collector.rb', line 124

def invalid_outside_links
  @@outside_links[:invalid]
end


116
117
118
# File 'lib/kleos_test/links_collector.rb', line 116

def outside_links
  @@outside_links[:valid] | @@outside_links[:invalid]
end


100
101
102
# File 'lib/kleos_test/links_collector.rb', line 100

def unverified_inside_links
  @@unverified_inside_links
end


108
109
110
# File 'lib/kleos_test/links_collector.rb', line 108

def valid_inside_links
  @@inside_links[:valid]
end


120
121
122
# File 'lib/kleos_test/links_collector.rb', line 120

def valid_outside_links
  @@outside_links[:valid]
end

Instance Method Details

#download_inside_webpageObject



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/kleos_test/links_collector.rb', line 41

def download_inside_webpage
  address = @target.include?('http') ? @target : KleosTest.base_address + @target
  @@inside_downloads += 1
  print "Downloading (#{@@inside_downloads}|#{@@unverified_inside_links.size})\
 #{URI::decode(address)}..."
  response = RestClient.get(address)
  puts "OK"
  [response.body, response.code]
rescue RestClient::ExceptionWithResponse => e
  puts "ERROR"
  [e.response.body, e.response.code]
rescue URI::InvalidURIError
  puts "ERROR"
  puts "BAD URI"
  ['fake body', 1]
rescue
  puts "UNKNOWN ERROR"
  ['fake body', 1]
end

#download_outside_webpage(address, counter) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/kleos_test/links_collector.rb', line 61

def download_outside_webpage(address, counter)
  print "Downloading (#{counter}|#{@@unverified_outside_links.size - counter})\
 #{URI::decode(address)}..."
  response = RestClient.get(address)
  puts "OK"
  response.code
rescue RestClient::ExceptionWithResponse => e
  puts "ERROR"
  e.response.code
rescue URI::InvalidURIError
  puts "ERROR"
  puts "BAD URI"
  1
rescue
  puts "UNKNOWN ERROR"
  1
end


79
80
81
82
83
84
85
# File 'lib/kleos_test/links_collector.rb', line 79

def extract_links(page)
  query = '//a[@href!=""]'
  query += '[not(starts-with(@href, "javascript:void(0)"))]'
  query += '[not(starts-with(@href, "#"))]'
  query += '[not(starts-with(@href, "mailto"))]'
  Nokogiri::HTML(page).xpath(query).map { |link| link.attribute('href').value }
end


17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/kleos_test/links_collector.rb', line 17

def get_new_links
  page, response_code = download_inside_webpage
  if response_code == 200
    @@inside_links[:valid] << @target
    @@verified_links << @target
    links = extract_links(page)
    refill_unverified_links(links)
  else
    @@inside_links[:invalid] << @target
    @@verified_links << @target
  end
end


13
14
15
# File 'lib/kleos_test/links_collector.rb', line 13

def get_target_link
  @@unverified_inside_links.empty? ? '/' : @@unverified_inside_links.shift
end


87
88
89
90
91
92
93
94
95
96
97
# File 'lib/kleos_test/links_collector.rb', line 87

def refill_unverified_links(links)
  regexp = /^(\/|\?|https?:\/\/(www\.)?kleos\.ru)(?!\/forum)/
  @@unverified_inside_links.concat(links.select do |link|
    link.match(regexp) && !(
      @@unverified_inside_links.include?(link) ||
      @@verified_links.include?(link))
  end.uniq)

  @@unverified_outside_links.concat(
    links.reject { |link| link.match(regexp) }).uniq!
end


30
31
32
33
34
35
36
37
38
39
# File 'lib/kleos_test/links_collector.rb', line 30

def verify_outside_links
  @@unverified_outside_links.each_with_index do |link, i|
    code = download_outside_webpage(link, i + 1)
    if code == 200
      @@outside_links[:valid] << link
    else
      @@outside_links[:invalid] << link
    end
  end
end