Class: KleosTest::LinksCollector

Inherits:
Object
  • Object
show all
Defined in:
lib/kleos_test/links_collector.rb

Constant Summary collapse

BASE_ADDRESS =
'https://www.kleos.ru'
['/']
[]
[]
{ valid: [], invalid: [] }
{ valid: [], invalid: [] }
@@inside_downloads =
0

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeLinksCollector

Returns a new instance of LinksCollector.



11
12
13
# File 'lib/kleos_test/links_collector.rb', line 11

def initialize
  @target = get_target_link
end

Class Method Details



106
107
108
# File 'lib/kleos_test/links_collector.rb', line 106

def inside_links
  @@inside_links[:valid] | @@inside_links[:invalid]
end


114
115
116
# File 'lib/kleos_test/links_collector.rb', line 114

def invalid_inside_links
  @@inside_links[:invalid]
end


126
127
128
# File 'lib/kleos_test/links_collector.rb', line 126

def invalid_outside_links
  @@outside_links[:invalid]
end


118
119
120
# File 'lib/kleos_test/links_collector.rb', line 118

def outside_links
  @@outside_links[:valid] | @@outside_links[:invalid]
end


102
103
104
# File 'lib/kleos_test/links_collector.rb', line 102

def unverified_inside_links
  @@unverified_inside_links
end


110
111
112
# File 'lib/kleos_test/links_collector.rb', line 110

def valid_inside_links
  @@inside_links[:valid]
end


122
123
124
# File 'lib/kleos_test/links_collector.rb', line 122

def valid_outside_links
  @@outside_links[:valid]
end

Instance Method Details

#download_inside_webpageObject



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/kleos_test/links_collector.rb', line 43

def download_inside_webpage
  address = @target.include?('http') ? @target : BASE_ADDRESS + @target
  @@inside_downloads += 1
  print "Downloading (#{@@inside_downloads}|#{@@unverified_inside_links.size})\
 #{address}..."
  response = RestClient.get(address)
  puts "OK"
  [response.body, response.code]
rescue RestClient::ExceptionWithResponse => e
  puts "ERROR"
  [e.response.body, e.response.code]
rescue URI::InvalidURIError
  puts "ERROR"
  puts "BAD URI"
  ['fake body', 1]
rescue
  puts "UNKNOWN ERROR"
  ['fake body', 1]
end

#download_outside_webpage(address, counter) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/kleos_test/links_collector.rb', line 63

def download_outside_webpage(address, counter)
  print "Downloading (#{counter}|#{@@unverified_outside_links.size - counter})\
 #{address}..."
  response = RestClient.get(address)
  puts "OK"
  response.code
rescue RestClient::ExceptionWithResponse => e
  puts "ERROR"
  e.response.code
rescue URI::InvalidURIError
  puts "ERROR"
  puts "BAD URI"
  1
rescue
  puts "UNKNOWN ERROR"
  1
end


81
82
83
84
85
86
87
# File 'lib/kleos_test/links_collector.rb', line 81

def extract_links(page)
  query = '//a[@href!=""]'
  query += '[not(starts-with(@href, "javascript:void(0)"))]'
  query += '[not(starts-with(@href, "#"))]'
  query += '[not(starts-with(@href, "mailto"))]'
  Nokogiri::HTML(page).xpath(query).map { |link| link.attribute('href').value }
end


19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/kleos_test/links_collector.rb', line 19

def get_new_links
  page, response_code = download_inside_webpage
  if response_code == 200
    @@inside_links[:valid] << @target
    @@verified_links << @target
    links = extract_links(page)
    refill_unverified_links(links)
  else
    @@inside_links[:invalid] << @target
    @@verified_links << @target
  end
end


15
16
17
# File 'lib/kleos_test/links_collector.rb', line 15

def get_target_link
  @@unverified_inside_links.empty? ? '/' : @@unverified_inside_links.shift
end


89
90
91
92
93
94
95
96
97
98
99
# File 'lib/kleos_test/links_collector.rb', line 89

def refill_unverified_links(links)
  regexp = /^(\/|\?|https?:\/\/(www\.)?kleos\.ru)(?!\/forum)/
  @@unverified_inside_links.concat(links.select do |link|
    link.match(regexp) && !(
      @@unverified_inside_links.include?(link) ||
      @@verified_links.include?(link))
  end.uniq)

  @@unverified_outside_links.concat(
    links.reject { |link| link.match(regexp) }).uniq!
end


32
33
34
35
36
37
38
39
40
41
# File 'lib/kleos_test/links_collector.rb', line 32

def verify_outside_links
  @@unverified_outside_links.each_with_index do |link, i|
    code = download_outside_webpage(link, i + 1)
    if code == 200
      @@outside_links[:valid] << link
    else
      @@outside_links[:invalid] << link
    end
  end
end