Class: Shnork::Shnork
Instance Attribute Summary collapse
-
#depth ⇒ Object
Returns the value of attribute depth.
-
#page ⇒ Object
readonly
Returns the value of attribute page.
-
#reached ⇒ Object
readonly
Returns the value of attribute reached.
Instance Method Summary collapse
- #analyze(page) ⇒ Object
-
#initialize ⇒ Shnork
constructor
A new instance of Shnork.
- #less_than_max_depth(d) ⇒ Object
- #links(current_page) ⇒ Object
- #record(page) ⇒ Object
- #retrieve(page_from) ⇒ Object
Constructor Details
#initialize ⇒ Shnork
Returns a new instance of Shnork.
24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/shnork.rb', line 24 def initialize set :env, :production set :run, false @app = Sinatra::application @request = Rack::MockRequest.new(Sinatra.build_application) @reached = [] @found = [] @internal_depth = {} @internal_pred = {} @depth = :all @results = ShnorkResults.new end |
Instance Attribute Details
#depth ⇒ Object
Returns the value of attribute depth.
22 23 24 |
# File 'lib/shnork.rb', line 22 def depth @depth end |
#page ⇒ Object (readonly)
Returns the value of attribute page.
21 22 23 |
# File 'lib/shnork.rb', line 21 def page @page end |
#reached ⇒ Object (readonly)
Returns the value of attribute reached.
21 22 23 |
# File 'lib/shnork.rb', line 21 def reached @reached end |
Instance Method Details
#analyze(page) ⇒ Object
39 40 41 42 43 44 45 46 47 48 |
# File 'lib/shnork.rb', line 39 def analyze page @found << [page, nil] while @found.size != 0 link = @found.shift next if @reached.include? link.first retrieve link end return @results end |
#less_than_max_depth(d) ⇒ Object
54 55 56 57 |
# File 'lib/shnork.rb', line 54 def less_than_max_depth d return true if @depth == :all d <= @depth end |
#links(current_page) ⇒ Object
72 73 74 75 |
# File 'lib/shnork.rb', line 72 def links current_page return [] unless @page Nokogiri::HTML(@page.body).xpath('//a').select { |link| link.attributes["href"] =~ /^\// }.map { |link| [link.attributes["href"], current_page] } end |
#record(page) ⇒ Object
50 51 52 |
# File 'lib/shnork.rb', line 50 def record page @reached = @reached | [page] end |
#retrieve(page_from) ⇒ Object
59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/shnork.rb', line 59 def retrieve page_from page = page_from.first from = page_from.last @internal_depth[page] = from ? @internal_depth[from] + 1 : 0 @internal_pred[page] = from return unless less_than_max_depth @internal_depth[page] @page = @request.get(page) @page = @request.get(@page.headers["Location"]) if @page.status >= 300 and @page.status < 400 @results.add @page.status, page @found = @found | links(page) record page end |