Class: WebInspector::Inspector
- Inherits:
-
Object
- Object
- WebInspector::Inspector
- Defined in:
- lib/web_inspector/inspector.rb
Instance Attribute Summary collapse
-
#host ⇒ Object
readonly
Returns the value of attribute host.
-
#meta ⇒ Object
readonly
Returns the value of attribute meta.
-
#page ⇒ Object
readonly
Returns the value of attribute page.
-
#url ⇒ Object
readonly
Returns the value of attribute url.
Instance Method Summary collapse
-
#accessibility_score ⇒ Hash
Calculate a basic accessibility score.
- #body ⇒ Object
-
#cms_info ⇒ Hash
Detect CMS and get detailed information.
- #description ⇒ Object
-
#domain_images(user_domain, host = nil) ⇒ Array<String>
Get images from a specific domain.
-
#domain_links(user_domain, host = nil) ⇒ Array<String>
Get links from a specific domain.
-
#feeds ⇒ Array<String>
Extract RSS/Atom feeds from the page.
-
#find(words) ⇒ Array<Hash>
Search for specific words in the page content.
-
#images ⇒ Array<String>
Get all images from the page.
-
#initialize(page) ⇒ Inspector
constructor
A new instance of Inspector.
-
#javascripts ⇒ Array<String>
Get all JavaScript files used by the page.
-
#language ⇒ String?
Detect the page language.
-
#links ⇒ Array<String>
Get all links from the page.
-
#microdata ⇒ Array<Hash>
Extract microdata from the page.
-
#mobile_friendly? ⇒ Boolean
Check if the page is mobile-friendly.
-
#robots_txt_url ⇒ String
Get robots.txt URL.
- #set_url(url, host) ⇒ Object
-
#sitemap_url ⇒ Array<String>
Get sitemap URL.
-
#social_links ⇒ Hash
Extract social media profile links.
-
#structured_data ⇒ Array<Hash>
Extract structured data (JSON-LD) from the page.
-
#stylesheets ⇒ Array<String>
Get stylesheets used by the page.
-
#tag_count ⇒ Hash
Count all tag types on the page.
- #title ⇒ Object
Constructor Details
#initialize(page) ⇒ Inspector
Returns a new instance of Inspector.
9 10 11 12 13 |
# File 'lib/web_inspector/inspector.rb', line 9 def initialize(page) @page = page @meta = WebInspector::Meta.new(page). @base_url = nil end |
Instance Attribute Details
#host ⇒ Object (readonly)
Returns the value of attribute host.
7 8 9 |
# File 'lib/web_inspector/inspector.rb', line 7 def host @host end |
#meta ⇒ Object (readonly)
Returns the value of attribute meta.
7 8 9 |
# File 'lib/web_inspector/inspector.rb', line 7 def @meta end |
#page ⇒ Object (readonly)
Returns the value of attribute page.
7 8 9 |
# File 'lib/web_inspector/inspector.rb', line 7 def page @page end |
#url ⇒ Object (readonly)
Returns the value of attribute url.
7 8 9 |
# File 'lib/web_inspector/inspector.rb', line 7 def url @url end |
Instance Method Details
#accessibility_score ⇒ Hash
Calculate a basic accessibility score
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 |
# File 'lib/web_inspector/inspector.rb', line 352 def accessibility_score @accessibility_score ||= begin score = 100 details = [] # Check images for alt text images_without_alt = @page.css('img:not([alt])').count total_images = @page.css('img').count if total_images.positive? alt_percentage = ((total_images - images_without_alt).to_f / total_images * 100).round if alt_percentage < 100 penalty = (100 - alt_percentage) / 4 # Max 25 points penalty score -= penalty details << "#{images_without_alt} images missing alt text" end end # Check heading hierarchy h1_count = @page.css('h1').count if h1_count.zero? score -= 15 details << 'No H1 heading found' elsif h1_count > 1 score -= 10 details << 'Multiple H1 headings found' end # Check for ARIA labels on interactive elements = @page.css('button:not([aria-label]):not([aria-labelledby])').select do |btn| btn.text.strip.empty? end.count if .positive? score -= [ * 5, 20].min details << "#{} buttons without accessible labels" end # Check for language attribute html_tag = @page.at('html') if html_tag.nil? || html_tag['lang'].nil? || html_tag['lang'].empty? score -= 10 details << 'No language attribute on HTML element' end # Check for form labels inputs = @page.css('input[type="text"], input[type="email"], input[type="password"], textarea') inputs_without_labels = inputs.select do |input| id = input['id'] !id || @page.css("label[for=\"#{id}\"]").empty? end.count if inputs_without_labels.positive? score -= [inputs_without_labels * 5, 15].min details << "#{inputs_without_labels} form inputs without labels" end { score: [score, 0].max, details: details } end end |
#body ⇒ Object
30 31 32 |
# File 'lib/web_inspector/inspector.rb', line 30 def body @page.css('body').to_html end |
#cms_info ⇒ Hash
Detect CMS and get detailed information
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 |
# File 'lib/web_inspector/inspector.rb', line 305 def cms_info @cms_info ||= begin info = { name: nil, version: nil, themes: [], plugins: [] } # WordPress detection if @page.to_html.include?('wp-content') || @meta['generator']&.include?('WordPress') info[:name] = 'WordPress' # Try to extract version from generator meta tag info[:version] = Regexp.last_match(1) if @meta['generator'] =~ /WordPress\s+([\d.]+)/ # Detect themes @page.css('link[href*="wp-content/themes"]').each do |link| info[:themes] << Regexp.last_match(1) if link[:href] =~ %r{themes/([^/]+)} end # Detect plugins @page.css('link[href*="wp-content/plugins"], script[src*="wp-content/plugins"]').each do |elem| src = elem[:href] || elem[:src] info[:plugins] << Regexp.last_match(1) if src =~ %r{plugins/([^/]+)} end # Drupal detection elsif @page.to_html.include?('Drupal') || @meta['generator']&.include?('Drupal') info[:name] = 'Drupal' info[:version] = Regexp.last_match(1) if @meta['generator'] =~ /Drupal\s+([\d.]+)/ # Joomla detection elsif @meta['generator']&.include?('Joomla') info[:name] = 'Joomla' info[:version] = Regexp.last_match(1) if @meta['generator'] =~ /Joomla!\s+([\d.]+)/ # Shopify detection elsif @page.to_html.include?('cdn.shopify.com') || @page.to_html.include?('Shopify') info[:name] = 'Shopify' # Wix detection elsif @page.to_html.include?('wix.com') || @page.to_html.include?('_wix') info[:name] = 'Wix' # Squarespace detection elsif @page.to_html.include?('squarespace') info[:name] = 'Squarespace' end info[:themes].uniq! info[:plugins].uniq! info end end |
#description ⇒ Object
26 27 28 |
# File 'lib/web_inspector/inspector.rb', line 26 def description @meta['description'] || @meta['og:description'] || snippet end |
#domain_images(user_domain, host = nil) ⇒ Array<String>
Get images from a specific domain
104 105 106 107 |
# File 'lib/web_inspector/inspector.rb', line 104 def domain_images(user_domain, host = nil) @host ||= host filter_by_domain(images, user_domain) end |
#domain_links(user_domain, host = nil) ⇒ Array<String>
Get links from a specific domain
72 73 74 75 |
# File 'lib/web_inspector/inspector.rb', line 72 def domain_links(user_domain, host = nil) @host ||= host filter_by_domain(links, user_domain) end |
#feeds ⇒ Array<String>
Extract RSS/Atom feeds from the page
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
# File 'lib/web_inspector/inspector.rb', line 233 def feeds @feeds ||= begin feeds = [] # Look for feed link tags @page.css('link[type="application/rss+xml"], link[type="application/atom+xml"]').each do |link| href = link[:href] feeds << make_absolute_url(href) if href end # Look for common feed patterns in links links.each do |link| feeds << link if link =~ %r{/(feed|rss|atom)(/|\.xml|$)}i end feeds.uniq.compact end end |
#find(words) ⇒ Array<Hash>
Search for specific words in the page content
37 38 39 40 |
# File 'lib/web_inspector/inspector.rb', line 37 def find(words) text = @page.at('html').inner_text counter(text.downcase, words) end |
#images ⇒ Array<String>
Get all images from the page
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/web_inspector/inspector.rb', line 79 def images @images ||= begin images = [] @page.css('img').each do |img| src = img[:src] next unless src # Clean and normalize URL src = src.strip begin absolute_url = make_absolute_url(src) images << absolute_url if absolute_url rescue URI::InvalidURIError, URI::BadURIError # Skip invalid URLs end end images.uniq.compact end end |
#javascripts ⇒ Array<String>
Get all JavaScript files used by the page
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/web_inspector/inspector.rb', line 111 def javascripts @javascripts ||= begin scripts = [] @page.css('script[src]').each do |script| src = script[:src] next unless src # Clean and normalize URL src = src.strip begin absolute_url = make_absolute_url(src) scripts << absolute_url if absolute_url rescue URI::InvalidURIError, URI::BadURIError # Skip invalid URLs end end scripts.uniq.compact end end |
#language ⇒ String?
Detect the page language
157 158 159 160 161 162 163 164 165 166 167 168 |
# File 'lib/web_inspector/inspector.rb', line 157 def language # Check for html lang attribute first html_tag = @page.at('html') return html_tag['lang'] if html_tag && html_tag['lang'] && !html_tag['lang'].empty? # Then check for language meta tag = @meta['content-language'] return if && !.empty? # Fallback to inspecting content headers if available nil end |
#links ⇒ Array<String>
Get all links from the page
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/web_inspector/inspector.rb', line 44 def links @links ||= begin links = [] @page.css('a').each do |a| href = a[:href] next unless href # Skip javascript and mailto links next if href.start_with?('javascript:', 'mailto:', 'tel:') # Clean and normalize URL href = href.strip begin absolute_url = make_absolute_url(href) links << absolute_url if absolute_url rescue URI::InvalidURIError # Skip invalid URLs end end links.uniq end end |
#microdata ⇒ Array<Hash>
Extract microdata from the page
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
# File 'lib/web_inspector/inspector.rb', line 187 def microdata @microdata ||= begin items = [] @page.css('[itemscope]').each do |scope| item = { type: scope['itemtype'] } properties = {} scope.css('[itemprop]').each do |prop| name = prop['itemprop'] # Extract value based on tag value = case prop.name.downcase when 'meta' prop['content'] when 'img', 'audio', 'embed', 'iframe', 'source', 'track', 'video' make_absolute_url(prop['src']) when 'a', 'area', 'link' make_absolute_url(prop['href']) when 'time' prop['datetime'] || prop.text.strip else prop.text.strip end properties[name] = value end item[:properties] = properties items << item end items end end |
#mobile_friendly? ⇒ Boolean
Check if the page is mobile-friendly
415 416 417 418 419 420 421 422 423 424 425 426 |
# File 'lib/web_inspector/inspector.rb', line 415 def mobile_friendly? @mobile_friendly ||= begin # Check for viewport meta tag = @meta['viewport'] = !.nil? && .include?('width=device-width') # Check for responsive CSS (media queries) has_media_queries = stylesheets.any? || @page.to_html.include?('@media') && has_media_queries end end |
#robots_txt_url ⇒ String
Get robots.txt URL
280 281 282 |
# File 'lib/web_inspector/inspector.rb', line 280 def robots_txt_url "#{@url.split('/')[0..2].join('/')}/robots.txt" if @url end |
#set_url(url, host) ⇒ Object
15 16 17 18 |
# File 'lib/web_inspector/inspector.rb', line 15 def set_url(url, host) @url = url @host = host end |
#sitemap_url ⇒ Array<String>
Get sitemap URL
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 |
# File 'lib/web_inspector/inspector.rb', line 286 def sitemap_url @sitemap_url ||= begin sitemaps = [] # Check for sitemap link tag @page.css('link[rel="sitemap"]').each do |link| href = link[:href] sitemaps << make_absolute_url(href) if href end # Add default sitemap.xml sitemaps << "#{@url.split('/')[0..2].join('/')}/sitemap.xml" if @url sitemaps.uniq.compact end end |
#social_links ⇒ Hash
Extract social media profile links
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
# File 'lib/web_inspector/inspector.rb', line 254 def @social_links ||= begin = {} platforms = { facebook: /facebook\.com/, twitter: /(twitter\.com|x\.com)/, linkedin: /linkedin\.com/, instagram: /instagram\.com/, youtube: /youtube\.com/, github: /github\.com/, tiktok: /tiktok\.com/ } # Check links links.each do |link| platforms.each do |platform, pattern| [platform] ||= link if link.match?(pattern) end end end end |
#structured_data ⇒ Array<Hash>
Extract structured data (JSON-LD) from the page
172 173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/web_inspector/inspector.rb', line 172 def structured_data @structured_data ||= begin data = [] @page.css('script[type="application/ld+json"]').each do |script| parsed = JSON.parse(script.text) data << parsed if parsed rescue JSON::ParserError # Skip invalid JSON end data end end |
#stylesheets ⇒ Array<String>
Get stylesheets used by the page
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/web_inspector/inspector.rb', line 134 def stylesheets @stylesheets ||= begin styles = [] @page.css('link[rel="stylesheet"]').each do |style| href = style[:href] next unless href # Clean and normalize URL href = href.strip begin absolute_url = make_absolute_url(href) styles << absolute_url if absolute_url rescue URI::InvalidURIError, URI::BadURIError # Skip invalid URLs end end styles.uniq.compact end end |
#tag_count ⇒ Hash
Count all tag types on the page
221 222 223 224 225 226 227 228 229 |
# File 'lib/web_inspector/inspector.rb', line 221 def tag_count = {} @page.css('*').each do |element| tag_name = element.name.downcase [tag_name] ||= 0 [tag_name] += 1 end end |
#title ⇒ Object
20 21 22 23 24 |
# File 'lib/web_inspector/inspector.rb', line 20 def title @page.css('title').inner_text.strip rescue StandardError nil end |