Class: KleosTest::LinksCollector
- Inherits:
-
Object
- Object
- KleosTest::LinksCollector
- Defined in:
- lib/kleos_test/links_collector.rb
Constant Summary collapse
- @@unverified_inside_links =
['/']
- @@unverified_outside_links =
[]
- @@verified_links =
[]
- @@inside_links =
{ valid: [], invalid: [] }
- @@outside_links =
{ valid: [], invalid: [] }
- @@inside_downloads =
0
Class Method Summary collapse
- .inside_links ⇒ Object
- .invalid_inside_links ⇒ Object
- .invalid_outside_links ⇒ Object
- .outside_links ⇒ Object
- .unverified_inside_links ⇒ Object
- .valid_inside_links ⇒ Object
- .valid_outside_links ⇒ Object
Instance Method Summary collapse
- #download_inside_webpage ⇒ Object
- #download_outside_webpage(address, counter) ⇒ Object
- #extract_links(page) ⇒ Object
- #get_new_links ⇒ Object
- #get_target_link ⇒ Object
-
#initialize ⇒ LinksCollector
constructor
A new instance of LinksCollector.
- #refill_unverified_links(links) ⇒ Object
- #verify_outside_links ⇒ Object
Constructor Details
#initialize ⇒ LinksCollector
Returns a new instance of LinksCollector.
9 10 11 |
# File 'lib/kleos_test/links_collector.rb', line 9 def initialize @target = get_target_link end |
Class Method Details
.inside_links ⇒ Object
104 105 106 |
# File 'lib/kleos_test/links_collector.rb', line 104 def inside_links @@inside_links[:valid] | @@inside_links[:invalid] end |
.invalid_inside_links ⇒ Object
112 113 114 |
# File 'lib/kleos_test/links_collector.rb', line 112 def invalid_inside_links @@inside_links[:invalid] end |
.invalid_outside_links ⇒ Object
124 125 126 |
# File 'lib/kleos_test/links_collector.rb', line 124 def invalid_outside_links @@outside_links[:invalid] end |
.outside_links ⇒ Object
116 117 118 |
# File 'lib/kleos_test/links_collector.rb', line 116 def outside_links @@outside_links[:valid] | @@outside_links[:invalid] end |
.unverified_inside_links ⇒ Object
100 101 102 |
# File 'lib/kleos_test/links_collector.rb', line 100 def unverified_inside_links @@unverified_inside_links end |
.valid_inside_links ⇒ Object
108 109 110 |
# File 'lib/kleos_test/links_collector.rb', line 108 def valid_inside_links @@inside_links[:valid] end |
.valid_outside_links ⇒ Object
120 121 122 |
# File 'lib/kleos_test/links_collector.rb', line 120 def valid_outside_links @@outside_links[:valid] end |
Instance Method Details
#download_inside_webpage ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/kleos_test/links_collector.rb', line 41 def download_inside_webpage address = @target.include?('http') ? @target : KleosTest.base_address + @target @@inside_downloads += 1 print "Downloading (#{@@inside_downloads}|#{@@unverified_inside_links.size})\ #{URI::decode(address)}..." response = RestClient.get(address) puts "OK" [response.body, response.code] rescue RestClient::ExceptionWithResponse => e puts "ERROR" [e.response.body, e.response.code] rescue URI::InvalidURIError puts "ERROR" puts "BAD URI" ['fake body', 1] rescue puts "UNKNOWN ERROR" ['fake body', 1] end |
#download_outside_webpage(address, counter) ⇒ Object
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/kleos_test/links_collector.rb', line 61 def download_outside_webpage(address, counter) print "Downloading (#{counter}|#{@@unverified_outside_links.size - counter})\ #{URI::decode(address)}..." response = RestClient.get(address) puts "OK" response.code rescue RestClient::ExceptionWithResponse => e puts "ERROR" e.response.code rescue URI::InvalidURIError puts "ERROR" puts "BAD URI" 1 rescue puts "UNKNOWN ERROR" 1 end |
#extract_links(page) ⇒ Object
79 80 81 82 83 84 85 |
# File 'lib/kleos_test/links_collector.rb', line 79 def extract_links(page) query = '//a[@href!=""]' query += '[not(starts-with(@href, "javascript:void(0)"))]' query += '[not(starts-with(@href, "#"))]' query += '[not(starts-with(@href, "mailto"))]' Nokogiri::HTML(page).xpath(query).map { |link| link.attribute('href').value } end |
#get_new_links ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/kleos_test/links_collector.rb', line 17 def get_new_links page, response_code = download_inside_webpage if response_code == 200 @@inside_links[:valid] << @target @@verified_links << @target links = extract_links(page) refill_unverified_links(links) else @@inside_links[:invalid] << @target @@verified_links << @target end end |
#get_target_link ⇒ Object
13 14 15 |
# File 'lib/kleos_test/links_collector.rb', line 13 def get_target_link @@unverified_inside_links.empty? ? '/' : @@unverified_inside_links.shift end |
#refill_unverified_links(links) ⇒ Object
87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/kleos_test/links_collector.rb', line 87 def refill_unverified_links(links) regexp = /^(\/|\?|https?:\/\/(www\.)?kleos\.ru)(?!\/forum)/ @@unverified_inside_links.concat(links.select do |link| link.match(regexp) && !( @@unverified_inside_links.include?(link) || @@verified_links.include?(link)) end.uniq) @@unverified_outside_links.concat( links.reject { |link| link.match(regexp) }).uniq! end |
#verify_outside_links ⇒ Object
30 31 32 33 34 35 36 37 38 39 |
# File 'lib/kleos_test/links_collector.rb', line 30 def verify_outside_links @@unverified_outside_links.each_with_index do |link, i| code = download_outside_webpage(link, i + 1) if code == 200 @@outside_links[:valid] << link else @@outside_links[:invalid] << link end end end |