Method: WWMD::Page#set_data
- Defined in:
- lib/wwmd/page/page.rb
#set_data ⇒ Object
set reporting data for the page
Scan for comments, anchors, links and javascript includes and set page flags. The heavy lifting for parsing is done in the scrape class.
returns: array [ code, page_status, body_data.size ]
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/wwmd/page/page.rb', line 96 def set_data # reset scrape and inputs object # transparently gunzip begin io = StringIO.new(self.body_data) gz = Zlib::GzipReader.new(io) self.body_data.replace(gz.read) rescue => e end @scrape.reset(self.body_data) @inputs.set # remove comments that are css selectors for IE silliness @comments = @scrape.for_comments.reject do |c| c =~ /\[if IE\]/ || c =~ /\[if IE \d/ || c =~ /\[if lt IE \d/ end @links = @scrape.for_links.map do |url| l = @urlparse.parse(self.last_effective_url,url).to_s end @jlinks = @scrape.for_javascript_links @forms = @scrape.for_forms @spider.add(self.last_effective_url,@links) return [self.code,self.body_data.size] end |