Class: WebInspector::Inspector

Inherits:
Object
  • Object
show all
Defined in:
lib/web_inspector/inspector.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(page) ⇒ Inspector

Returns a new instance of Inspector.



9
10
11
12
13
# File 'lib/web_inspector/inspector.rb', line 9

def initialize(page)
  @page = page
  @meta = WebInspector::Meta.new(page).meta
  @base_url = nil
end

Instance Attribute Details

#hostObject (readonly)

Returns the value of attribute host.



7
8
9
# File 'lib/web_inspector/inspector.rb', line 7

def host
  @host
end

#metaObject (readonly)

Returns the value of attribute meta.



7
8
9
# File 'lib/web_inspector/inspector.rb', line 7

def meta
  @meta
end

#pageObject (readonly)

Returns the value of attribute page.



7
8
9
# File 'lib/web_inspector/inspector.rb', line 7

def page
  @page
end

#urlObject (readonly)

Returns the value of attribute url.



7
8
9
# File 'lib/web_inspector/inspector.rb', line 7

def url
  @url
end

Instance Method Details

#accessibility_scoreHash

Calculate a basic accessibility score

Returns:

  • (Hash)

    Accessibility score and details



352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
# File 'lib/web_inspector/inspector.rb', line 352

def accessibility_score
  @accessibility_score ||= begin
    score = 100
    details = []

    # Check images for alt text
    images_without_alt = @page.css('img:not([alt])').count
    total_images = @page.css('img').count

    if total_images.positive?
      alt_percentage = ((total_images - images_without_alt).to_f / total_images * 100).round
      if alt_percentage < 100
        penalty = (100 - alt_percentage) / 4 # Max 25 points penalty
        score -= penalty
        details << "#{images_without_alt} images missing alt text"
      end
    end

    # Check heading hierarchy
    h1_count = @page.css('h1').count
    if h1_count.zero?
      score -= 15
      details << 'No H1 heading found'
    elsif h1_count > 1
      score -= 10
      details << 'Multiple H1 headings found'
    end

    # Check for ARIA labels on interactive elements
    buttons_without_aria = @page.css('button:not([aria-label]):not([aria-labelledby])').select do |btn|
      btn.text.strip.empty?
    end.count

    if buttons_without_aria.positive?
      score -= [buttons_without_aria * 5, 20].min
      details << "#{buttons_without_aria} buttons without accessible labels"
    end

    # Check for language attribute
    html_tag = @page.at('html')
    if html_tag.nil? || html_tag['lang'].nil? || html_tag['lang'].empty?
      score -= 10
      details << 'No language attribute on HTML element'
    end

    # Check for form labels
    inputs = @page.css('input[type="text"], input[type="email"], input[type="password"], textarea')
    inputs_without_labels = inputs.select do |input|
      id = input['id']
      !id || @page.css("label[for=\"#{id}\"]").empty?
    end.count

    if inputs_without_labels.positive?
      score -= [inputs_without_labels * 5, 15].min
      details << "#{inputs_without_labels} form inputs without labels"
    end

    { score: [score, 0].max, details: details }
  end
end

#bodyObject



30
31
32
# File 'lib/web_inspector/inspector.rb', line 30

def body
  @page.css('body').to_html
end

#cms_infoHash

Detect CMS and get detailed information

Returns:

  • (Hash)

    CMS information



305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/web_inspector/inspector.rb', line 305

def cms_info
  @cms_info ||= begin
    info = { name: nil, version: nil, themes: [], plugins: [] }

    # WordPress detection
    if @page.to_html.include?('wp-content') || @meta['generator']&.include?('WordPress')
      info[:name] = 'WordPress'
      # Try to extract version from generator meta tag
      info[:version] = Regexp.last_match(1) if @meta['generator'] =~ /WordPress\s+([\d.]+)/

      # Detect themes
      @page.css('link[href*="wp-content/themes"]').each do |link|
        info[:themes] << Regexp.last_match(1) if link[:href] =~ %r{themes/([^/]+)}
      end

      # Detect plugins
      @page.css('link[href*="wp-content/plugins"], script[src*="wp-content/plugins"]').each do |elem|
        src = elem[:href] || elem[:src]
        info[:plugins] << Regexp.last_match(1) if src =~ %r{plugins/([^/]+)}
      end
    # Drupal detection
    elsif @page.to_html.include?('Drupal') || @meta['generator']&.include?('Drupal')
      info[:name] = 'Drupal'
      info[:version] = Regexp.last_match(1) if @meta['generator'] =~ /Drupal\s+([\d.]+)/
    # Joomla detection
    elsif @meta['generator']&.include?('Joomla')
      info[:name] = 'Joomla'
      info[:version] = Regexp.last_match(1) if @meta['generator'] =~ /Joomla!\s+([\d.]+)/
    # Shopify detection
    elsif @page.to_html.include?('cdn.shopify.com') || @page.to_html.include?('Shopify')
      info[:name] = 'Shopify'
    # Wix detection
    elsif @page.to_html.include?('wix.com') || @page.to_html.include?('_wix')
      info[:name] = 'Wix'
    # Squarespace detection
    elsif @page.to_html.include?('squarespace')
      info[:name] = 'Squarespace'
    end

    info[:themes].uniq!
    info[:plugins].uniq!
    info
  end
end

#descriptionObject



26
27
28
# File 'lib/web_inspector/inspector.rb', line 26

def description
  @meta['description'] || @meta['og:description'] || snippet
end

#domain_images(user_domain, host = nil) ⇒ Array<String>

Get images from a specific domain

Parameters:

  • user_domain (String)

    Domain to filter images by

  • host (String) (defaults to: nil)

    Current host

Returns:

  • (Array<String>)

    Filtered images



104
105
106
107
# File 'lib/web_inspector/inspector.rb', line 104

def domain_images(user_domain, host = nil)
  @host ||= host
  filter_by_domain(images, user_domain)
end

Get links from a specific domain

Parameters:

  • user_domain (String)

    Domain to filter links by

  • host (String) (defaults to: nil)

    Current host

Returns:

  • (Array<String>)

    Filtered links



72
73
74
75
# File 'lib/web_inspector/inspector.rb', line 72

def domain_links(user_domain, host = nil)
  @host ||= host
  filter_by_domain(links, user_domain)
end

#feedsArray<String>

Extract RSS/Atom feeds from the page

Returns:

  • (Array<String>)

    Array of feed URLs



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/web_inspector/inspector.rb', line 233

def feeds
  @feeds ||= begin
    feeds = []

    # Look for feed link tags
    @page.css('link[type="application/rss+xml"], link[type="application/atom+xml"]').each do |link|
      href = link[:href]
      feeds << make_absolute_url(href) if href
    end

    # Look for common feed patterns in links
    links.each do |link|
      feeds << link if link =~ %r{/(feed|rss|atom)(/|\.xml|$)}i
    end

    feeds.uniq.compact
  end
end

#find(words) ⇒ Array<Hash>

Search for specific words in the page content

Parameters:

  • words (Array<String>)

    List of words to search for

Returns:

  • (Array<Hash>)

    Counts of word occurrences



37
38
39
40
# File 'lib/web_inspector/inspector.rb', line 37

def find(words)
  text = @page.at('html').inner_text
  counter(text.downcase, words)
end

#imagesArray<String>

Get all images from the page

Returns:

  • (Array<String>)

    Array of image URLs



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/web_inspector/inspector.rb', line 79

def images
  @images ||= begin
    images = []
    @page.css('img').each do |img|
      src = img[:src]
      next unless src

      # Clean and normalize URL
      src = src.strip

      begin
        absolute_url = make_absolute_url(src)
        images << absolute_url if absolute_url
      rescue URI::InvalidURIError, URI::BadURIError
        # Skip invalid URLs
      end
    end
    images.uniq.compact
  end
end

#javascriptsArray<String>

Get all JavaScript files used by the page

Returns:

  • (Array<String>)

    Array of JavaScript file URLs



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/web_inspector/inspector.rb', line 111

def javascripts
  @javascripts ||= begin
    scripts = []
    @page.css('script[src]').each do |script|
      src = script[:src]
      next unless src

      # Clean and normalize URL
      src = src.strip

      begin
        absolute_url = make_absolute_url(src)
        scripts << absolute_url if absolute_url
      rescue URI::InvalidURIError, URI::BadURIError
        # Skip invalid URLs
      end
    end
    scripts.uniq.compact
  end
end

#languageString?

Detect the page language

Returns:

  • (String, nil)

    Language code if detected, nil otherwise



157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/web_inspector/inspector.rb', line 157

def language
  # Check for html lang attribute first
  html_tag = @page.at('html')
  return html_tag['lang'] if html_tag && html_tag['lang'] && !html_tag['lang'].empty?

  # Then check for language meta tag
  lang_meta = @meta['content-language']
  return lang_meta if lang_meta && !lang_meta.empty?

  # Fallback to inspecting content headers if available
  nil
end

Get all links from the page

Returns:

  • (Array<String>)

    Array of URLs



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/web_inspector/inspector.rb', line 44

def links
  @links ||= begin
    links = []
    @page.css('a').each do |a|
      href = a[:href]
      next unless href

      # Skip javascript and mailto links
      next if href.start_with?('javascript:', 'mailto:', 'tel:')

      # Clean and normalize URL
      href = href.strip

      begin
        absolute_url = make_absolute_url(href)
        links << absolute_url if absolute_url
      rescue URI::InvalidURIError
        # Skip invalid URLs
      end
    end
    links.uniq
  end
end

#microdataArray<Hash>

Extract microdata from the page

Returns:

  • (Array<Hash>)

    Array of microdata items



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/web_inspector/inspector.rb', line 187

def microdata
  @microdata ||= begin
    items = []
    @page.css('[itemscope]').each do |scope|
      item = { type: scope['itemtype'] }
      properties = {}

      scope.css('[itemprop]').each do |prop|
        name = prop['itemprop']
        # Extract value based on tag
        value = case prop.name.downcase
                when 'meta'
                  prop['content']
                when 'img', 'audio', 'embed', 'iframe', 'source', 'track', 'video'
                  make_absolute_url(prop['src'])
                when 'a', 'area', 'link'
                  make_absolute_url(prop['href'])
                when 'time'
                  prop['datetime'] || prop.text.strip
                else
                  prop.text.strip
                end
        properties[name] = value
      end

      item[:properties] = properties
      items << item
    end
    items
  end
end

#mobile_friendly?Boolean

Check if the page is mobile-friendly

Returns:

  • (Boolean)

    true if mobile-friendly



415
416
417
418
419
420
421
422
423
424
425
426
# File 'lib/web_inspector/inspector.rb', line 415

def mobile_friendly?
  @mobile_friendly ||= begin
    # Check for viewport meta tag
    viewport = @meta['viewport']
    has_viewport = !viewport.nil? && viewport.include?('width=device-width')

    # Check for responsive CSS (media queries)
    has_media_queries = stylesheets.any? || @page.to_html.include?('@media')

    has_viewport && has_media_queries
  end
end

#robots_txt_urlString

Get robots.txt URL

Returns:

  • (String)

    robots.txt URL



280
281
282
# File 'lib/web_inspector/inspector.rb', line 280

def robots_txt_url
  "#{@url.split('/')[0..2].join('/')}/robots.txt" if @url
end

#set_url(url, host) ⇒ Object



15
16
17
18
# File 'lib/web_inspector/inspector.rb', line 15

def set_url(url, host)
  @url = url
  @host = host
end

#sitemap_urlArray<String>

Get sitemap URL

Returns:

  • (Array<String>)

    Array of sitemap URLs



286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/web_inspector/inspector.rb', line 286

def sitemap_url
  @sitemap_url ||= begin
    sitemaps = []

    # Check for sitemap link tag
    @page.css('link[rel="sitemap"]').each do |link|
      href = link[:href]
      sitemaps << make_absolute_url(href) if href
    end

    # Add default sitemap.xml
    sitemaps << "#{@url.split('/')[0..2].join('/')}/sitemap.xml" if @url

    sitemaps.uniq.compact
  end
end

Extract social media profile links

Returns:

  • (Hash)

    Hash of social platform => URL



254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/web_inspector/inspector.rb', line 254

def social_links
  @social_links ||= begin
    socials = {}
    platforms = {
      facebook: /facebook\.com/,
      twitter: /(twitter\.com|x\.com)/,
      linkedin: /linkedin\.com/,
      instagram: /instagram\.com/,
      youtube: /youtube\.com/,
      github: /github\.com/,
      tiktok: /tiktok\.com/
    }

    # Check links
    links.each do |link|
      platforms.each do |platform, pattern|
        socials[platform] ||= link if link.match?(pattern)
      end
    end

    socials
  end
end

#structured_dataArray<Hash>

Extract structured data (JSON-LD) from the page

Returns:

  • (Array<Hash>)

    Array of structured data objects



172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/web_inspector/inspector.rb', line 172

def structured_data
  @structured_data ||= begin
    data = []
    @page.css('script[type="application/ld+json"]').each do |script|
      parsed = JSON.parse(script.text)
      data << parsed if parsed
    rescue JSON::ParserError
      # Skip invalid JSON
    end
    data
  end
end

#stylesheetsArray<String>

Get stylesheets used by the page

Returns:

  • (Array<String>)

    Array of CSS file URLs



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/web_inspector/inspector.rb', line 134

def stylesheets
  @stylesheets ||= begin
    styles = []
    @page.css('link[rel="stylesheet"]').each do |style|
      href = style[:href]
      next unless href

      # Clean and normalize URL
      href = href.strip

      begin
        absolute_url = make_absolute_url(href)
        styles << absolute_url if absolute_url
      rescue URI::InvalidURIError, URI::BadURIError
        # Skip invalid URLs
      end
    end
    styles.uniq.compact
  end
end

#tag_countHash

Count all tag types on the page

Returns:

  • (Hash)

    Counts of different HTML elements



221
222
223
224
225
226
227
228
229
# File 'lib/web_inspector/inspector.rb', line 221

def tag_count
  tags = {}
  @page.css('*').each do |element|
    tag_name = element.name.downcase
    tags[tag_name] ||= 0
    tags[tag_name] += 1
  end
  tags
end

#titleObject



20
21
22
23
24
# File 'lib/web_inspector/inspector.rb', line 20

def title
  @page.css('title').inner_text.strip
rescue StandardError
  nil
end