Class: WebInspector::Page
- Inherits:
-
Object
- Object
- WebInspector::Page
- Defined in:
- lib/web_inspector/page.rb
Constant Summary collapse
- DEFAULT_TIMEOUT =
30
- DEFAULT_RETRIES =
3
- DEFAULT_USER_AGENT =
-> { "WebInspector/#{WebInspector::VERSION} (+https://github.com/davidesantangelo/webinspector)" }
Instance Attribute Summary collapse
-
#load_time ⇒ Float?
readonly
Get the load time of the page in seconds.
-
#status_code ⇒ Object
readonly
Returns the value of attribute status_code.
Instance Method Summary collapse
-
#content_type ⇒ String?
Get the content type of the page.
- #domain_images(u = domain) ⇒ Object
- #domain_links(u = domain) ⇒ Object
-
#error_message ⇒ String?
Get the error message if any.
-
#favicon ⇒ String?
Get the favicon URL if available.
-
#find(words) ⇒ Object
Special case for find method that takes arguments.
-
#initialize(url, options = {}) ⇒ Page
constructor
Initialize a new WebInspector Page.
-
#json_ld ⇒ Array<Hash>
Get all JSON-LD structured data as a hash.
- #response ⇒ Object
-
#security_info ⇒ Hash
Get information about the page’s security.
-
#size ⇒ Integer?
Get the size of the page in bytes.
-
#success? ⇒ Boolean
Check if the page was successfully loaded.
-
#technologies ⇒ Hash
Get a hash of all technologies detected on the page.
-
#to_hash ⇒ Hash
Get full JSON representation of the page with all new data.
Constructor Details
#initialize(url, options = {}) ⇒ Page
Initialize a new WebInspector Page
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/web_inspector/page.rb', line 37 def initialize(url, = {}) @url = url @options = @retries = [:retries] || DEFAULT_RETRIES @timeout = [:timeout] || DEFAULT_TIMEOUT @headers = [:headers] || { 'User-Agent' => [:user_agent] || DEFAULT_USER_AGENT.call } @allow_redirections = [:allow_redirections].nil? || [:allow_redirections] @request = WebInspector::Request.new(url) begin @inspector = WebInspector::Inspector.new(page) @inspector.set_url(url, host) @status_code = 200 rescue StandardError => e @error = e @status_code = e.respond_to?(:status_code) ? e.status_code : 500 end end |
Instance Attribute Details
#load_time ⇒ Float? (readonly)
Get the load time of the page in seconds
175 176 177 |
# File 'lib/web_inspector/page.rb', line 175 def load_time @load_time end |
#status_code ⇒ Object (readonly)
Returns the value of attribute status_code.
22 23 24 |
# File 'lib/web_inspector/page.rb', line 22 def status_code @status_code end |
Instance Method Details
#content_type ⇒ String?
Get the content type of the page
157 158 159 |
# File 'lib/web_inspector/page.rb', line 157 def content_type response&.headers && response.headers['content-type'] end |
#domain_images(u = domain) ⇒ Object
129 130 131 132 133 |
# File 'lib/web_inspector/page.rb', line 129 def domain_images(u = domain) return [] unless success? @inspector.domain_images(u, host) end |
#domain_links(u = domain) ⇒ Object
123 124 125 126 127 |
# File 'lib/web_inspector/page.rb', line 123 def domain_links(u = domain) return [] unless success? @inspector.domain_links(u, host) end |
#error_message ⇒ String?
Get the error message if any
67 68 69 |
# File 'lib/web_inspector/page.rb', line 67 def @error&. end |
#favicon ⇒ String?
Get the favicon URL if available
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/web_inspector/page.rb', line 98 def favicon return @favicon if defined?(@favicon) return nil unless success? @favicon = begin # Try multiple approaches to find favicon # 1. Look for standard favicon link tags favicon_link = @inspector.page.css("link[rel='shortcut icon'], link[rel='icon'], link[rel='apple-touch-icon']").first if favicon_link && favicon_link['href'] begin return URI.join(url, favicon_link['href']).to_s rescue URI::InvalidURIError # Try next method end end # 2. Try the default location /favicon.ico "#{scheme}://#{host}/favicon.ico" rescue StandardError nil end end |
#find(words) ⇒ Object
Special case for find method that takes arguments
82 83 84 85 86 |
# File 'lib/web_inspector/page.rb', line 82 def find(words) return nil unless success? @inspector.find(words) end |
#json_ld ⇒ Array<Hash>
Get all JSON-LD structured data as a hash
179 180 181 |
# File 'lib/web_inspector/page.rb', line 179 def json_ld structured_data end |
#response ⇒ Object
261 262 263 264 265 266 |
# File 'lib/web_inspector/page.rb', line 261 def response @response ||= fetch rescue StandardError => e @error = e nil end |
#security_info ⇒ Hash
Get information about the page’s security
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/web_inspector/page.rb', line 137 def security_info return @security_info if defined?(@security_info) @security_info = { secure: scheme == 'https', hsts: response&.headers && response.headers['strict-transport-security'] ? true : false, content_security_policy: response&.headers && response.headers['content-security-policy'] ? true : false } # Extract SSL/TLS info if available and using HTTPS if scheme == 'https' && response&.env&.response_headers @security_info[:ssl_version] = response.env[:ssl_version] @security_info[:cipher_suite] = response.env[:cipher_suite] end @security_info end |
#size ⇒ Integer?
Get the size of the page in bytes
163 164 165 166 167 168 169 170 171 |
# File 'lib/web_inspector/page.rb', line 163 def size return @size if defined?(@size) @size = if response&.headers && response.headers['content-length'] response.headers['content-length'].to_i elsif response&.body response.body.bytesize end end |
#success? ⇒ Boolean
Check if the page was successfully loaded
60 61 62 |
# File 'lib/web_inspector/page.rb', line 60 def success? !@inspector.nil? && !@error end |
#technologies ⇒ Hash
Get a hash of all technologies detected on the page
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
# File 'lib/web_inspector/page.rb', line 185 def technologies techs = {} js_files = javascripts || [] css_files = stylesheets || [] page_body = body || '' = || {} response_headers = response&.headers || {} # Frameworks and Libraries techs[:jquery] = true if js_files.any? { |js| js.include?('jquery') } || page_body.include?('jQuery') techs[:react] = true if page_body.include?('data-reactroot') || js_files.any? { |js| js.include?('react') } techs[:vue] = true if page_body.include?('data-v-app') || js_files.any? { |js| js.include?('vue') } techs[:angular] = true if page_body.include?('ng-version') || js_files.any? { |js| js.include?('angular') } techs[:bootstrap] = true if css_files.any? do |css| css.include?('bootstrap') end || page_body.include?('class="container"') if response_headers['x-powered-by']&.include?('Rails') || response_headers.key?('x-rails-env') techs[:rails] = true end techs[:php] = true if response_headers['x-powered-by']&.include?('PHP') # CMS techs[:wordpress] = true if ['generator']&.include?('WordPress') || page_body.include?('/wp-content/') techs[:shopify] = true if page_body.include?('Shopify.shop') # Analytics techs[:google_analytics] = true if js_files.any? { |js| js.include?('google-analytics.com') } # Server server = response_headers['server'] if server techs[:server] = server techs[:nginx] = true if server.include?('nginx') techs[:apache] = true if server.include?('Apache') techs[:iis] = true if server.include?('IIS') techs[:express] = true if response_headers['x-powered-by']&.include?('Express') end techs end |
#to_hash ⇒ Hash
Get full JSON representation of the page with all new data
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 |
# File 'lib/web_inspector/page.rb', line 229 def to_hash { 'url' => url, 'scheme' => scheme, 'host' => host, 'port' => port, 'title' => title, 'description' => description, 'meta' => , 'links' => links, 'images' => images, 'javascripts' => javascripts, 'stylesheets' => stylesheets, 'favicon' => favicon, 'language' => language, 'structured_data' => structured_data, 'microdata' => microdata, 'security_info' => security_info, 'content_type' => content_type, 'size' => size, 'load_time' => load_time, 'technologies' => technologies, 'tag_count' => tag_count, 'response' => { 'status' => status_code, 'headers' => response&.headers || {}, 'success' => success? }, 'error' => } end |