Class: WebInspector::Inspector

Inherits:

Object

Object
WebInspector::Inspector

show all

Defined in:: lib/web_inspector/inspector.rb

Instance Attribute Summary collapse

#host ⇒ Object readonly

Returns the value of attribute host.
#meta ⇒ Object readonly

Returns the value of attribute meta.
#page ⇒ Object readonly

Returns the value of attribute page.
#url ⇒ Object readonly

Returns the value of attribute url.

Instance Method Summary collapse

#accessibility_score ⇒ Hash

Calculate a basic accessibility score.
#body ⇒ Object
#cms_info ⇒ Hash

Detect CMS and get detailed information.
#description ⇒ Object
#domain_images(user_domain, host = nil) ⇒ Array<String>

Get images from a specific domain.
#domain_links(user_domain, host = nil) ⇒ Array<String>

Get links from a specific domain.
#feeds ⇒ Array<String>

Extract RSS/Atom feeds from the page.
#find(words) ⇒ Array<Hash>

Search for specific words in the page content.
#images ⇒ Array<String>

Get all images from the page.
#initialize(page) ⇒ Inspector constructor

A new instance of Inspector.
#javascripts ⇒ Array<String>

Get all JavaScript files used by the page.
#language ⇒ String^?

Detect the page language.
#links ⇒ Array<String>

Get all links from the page.
#microdata ⇒ Array<Hash>

Extract microdata from the page.
#mobile_friendly? ⇒ Boolean

Check if the page is mobile-friendly.
#robots_txt_url ⇒ String

Get robots.txt URL.
#set_url(url, host) ⇒ Object
#sitemap_url ⇒ Array<String>

Get sitemap URL.
#social_links ⇒ Hash

Extract social media profile links.
#structured_data ⇒ Array<Hash>

Extract structured data (JSON-LD) from the page.
#stylesheets ⇒ Array<String>

Get stylesheets used by the page.
#tag_count ⇒ Hash

Count all tag types on the page.
#title ⇒ Object

Constructor Details

#initialize(page) ⇒ `Inspector`

Returns a new instance of Inspector.

# File 'lib/web_inspector/inspector.rb', line 9

def initialize(page)
  @page = page
  @meta = WebInspector::Meta.new(page).meta
  @base_url = nil
end

Instance Attribute Details

#host ⇒ `Object` (readonly)

Returns the value of attribute host.



7
8
9

# File 'lib/web_inspector/inspector.rb', line 7

def host
  @host
end

#meta ⇒ `Object` (readonly)

Returns the value of attribute meta.



7
8
9

# File 'lib/web_inspector/inspector.rb', line 7

def meta
  @meta
end

#page ⇒ `Object` (readonly)

Returns the value of attribute page.



7
8
9

# File 'lib/web_inspector/inspector.rb', line 7

def page
  @page
end

#url ⇒ `Object` (readonly)

Returns the value of attribute url.



7
8
9

# File 'lib/web_inspector/inspector.rb', line 7

def url
  @url
end

Instance Method Details

#accessibility_score ⇒ `Hash`

Calculate a basic accessibility score

Returns:

(Hash) —

Accessibility score and details

# File 'lib/web_inspector/inspector.rb', line 352

def accessibility_score
  @accessibility_score ||= begin
    score = 100
    details = []

    # Check images for alt text
    images_without_alt = @page.css('img:not([alt])').count
    total_images = @page.css('img').count

    if total_images.positive?
      alt_percentage = ((total_images - images_without_alt).to_f / total_images * 100).round
      if alt_percentage < 100
        penalty = (100 - alt_percentage) / 4 # Max 25 points penalty
        score -= penalty
        details << "#{images_without_alt} images missing alt text"
      end
    end

    # Check heading hierarchy
    h1_count = @page.css('h1').count
    if h1_count.zero?
      score -= 15
      details << 'No H1 heading found'
    elsif h1_count > 1
      score -= 10
      details << 'Multiple H1 headings found'
    end

    # Check for ARIA labels on interactive elements
    buttons_without_aria = @page.css('button:not([aria-label]):not([aria-labelledby])').select do |btn|
      btn.text.strip.empty?
    end.count

    if buttons_without_aria.positive?
      score -= [buttons_without_aria * 5, 20].min
      details << "#{buttons_without_aria} buttons without accessible labels"
    end

    # Check for language attribute
    html_tag = @page.at('html')
    if html_tag.nil? || html_tag['lang'].nil? || html_tag['lang'].empty?
      score -= 10
      details << 'No language attribute on HTML element'
    end

    # Check for form labels
    inputs = @page.css('input[type="text"], input[type="email"], input[type="password"], textarea')
    inputs_without_labels = inputs.select do |input|
      id = input['id']
      !id || @page.css("label[for=\"#{id}\"]").empty?
    end.count

    if inputs_without_labels.positive?
      score -= [inputs_without_labels * 5, 15].min
      details << "#{inputs_without_labels} form inputs without labels"
    end

    { score: [score, 0].max, details: details }
  end
end

#body ⇒ `Object`



30
31
32

# File 'lib/web_inspector/inspector.rb', line 30

def body
  @page.css('body').to_html
end

#cms_info ⇒ `Hash`

Detect CMS and get detailed information

Returns:

(Hash) —

CMS information

# File 'lib/web_inspector/inspector.rb', line 305

def cms_info
  @cms_info ||= begin
    info = { name: nil, version: nil, themes: [], plugins: [] }

    # WordPress detection
    if @page.to_html.include?('wp-content') || @meta['generator']&.include?('WordPress')
      info[:name] = 'WordPress'
      # Try to extract version from generator meta tag
      info[:version] = Regexp.last_match(1) if @meta['generator'] =~ /WordPress\s+([\d.]+)/

      # Detect themes
      @page.css('link[href*="wp-content/themes"]').each do |link|
        info[:themes] << Regexp.last_match(1) if link[:href] =~ %r{themes/([^/]+)}
      end

      # Detect plugins
      @page.css('link[href*="wp-content/plugins"], script[src*="wp-content/plugins"]').each do |elem|
        src = elem[:href] || elem[:src]
        info[:plugins] << Regexp.last_match(1) if src =~ %r{plugins/([^/]+)}
      end
    # Drupal detection
    elsif @page.to_html.include?('Drupal') || @meta['generator']&.include?('Drupal')
      info[:name] = 'Drupal'
      info[:version] = Regexp.last_match(1) if @meta['generator'] =~ /Drupal\s+([\d.]+)/
    # Joomla detection
    elsif @meta['generator']&.include?('Joomla')
      info[:name] = 'Joomla'
      info[:version] = Regexp.last_match(1) if @meta['generator'] =~ /Joomla!\s+([\d.]+)/
    # Shopify detection
    elsif @page.to_html.include?('cdn.shopify.com') || @page.to_html.include?('Shopify')
      info[:name] = 'Shopify'
    # Wix detection
    elsif @page.to_html.include?('wix.com') || @page.to_html.include?('_wix')
      info[:name] = 'Wix'
    # Squarespace detection
    elsif @page.to_html.include?('squarespace')
      info[:name] = 'Squarespace'
    end

    info[:themes].uniq!
    info[:plugins].uniq!
    info
  end
end

#description ⇒ `Object`



26
27
28

# File 'lib/web_inspector/inspector.rb', line 26

def description
  @meta['description'] || @meta['og:description'] || snippet
end

#domain_images(user_domain, host = nil) ⇒ `Array<String>`

Get images from a specific domain

Parameters:

user_domain (String) —

Domain to filter images by
host (String) (defaults to: nil) —

Current host

Returns:

(Array<String>) —

Filtered images

# File 'lib/web_inspector/inspector.rb', line 104

def domain_images(user_domain, host = nil)
  @host ||= host
  filter_by_domain(images, user_domain)
end

#domain_links(user_domain, host = nil) ⇒ `Array<String>`

Get links from a specific domain

Parameters:

user_domain (String) —

Domain to filter links by
host (String) (defaults to: nil) —

Current host

Returns:

(Array<String>) —

Filtered links

# File 'lib/web_inspector/inspector.rb', line 72

def domain_links(user_domain, host = nil)
  @host ||= host
  filter_by_domain(links, user_domain)
end

#feeds ⇒ `Array<String>`

Extract RSS/Atom feeds from the page

Returns:

(Array<String>) —

Array of feed URLs

# File 'lib/web_inspector/inspector.rb', line 233

def feeds
  @feeds ||= begin
    feeds = []

    # Look for feed link tags
    @page.css('link[type="application/rss+xml"], link[type="application/atom+xml"]').each do |link|
      href = link[:href]
      feeds << make_absolute_url(href) if href
    end

    # Look for common feed patterns in links
    links.each do |link|
      feeds << link if link =~ %r{/(feed|rss|atom)(/|\.xml|$)}i
    end

    feeds.uniq.compact
  end
end

#find(words) ⇒ `Array<Hash>`

Search for specific words in the page content

Parameters:

words (Array<String>) —

List of words to search for

Returns:

(Array<Hash>) —

Counts of word occurrences

# File 'lib/web_inspector/inspector.rb', line 37

def find(words)
  text = @page.at('html').inner_text
  counter(text.downcase, words)
end

#images ⇒ `Array<String>`

Get all images from the page

Returns:

(Array<String>) —

Array of image URLs

# File 'lib/web_inspector/inspector.rb', line 79

def images
  @images ||= begin
    images = []
    @page.css('img').each do |img|
      src = img[:src]
      next unless src

      # Clean and normalize URL
      src = src.strip

      begin
        absolute_url = make_absolute_url(src)
        images << absolute_url if absolute_url
      rescue URI::InvalidURIError, URI::BadURIError
        # Skip invalid URLs
      end
    end
    images.uniq.compact
  end
end

#javascripts ⇒ `Array<String>`

Get all JavaScript files used by the page

Returns:

(Array<String>) —

Array of JavaScript file URLs

# File 'lib/web_inspector/inspector.rb', line 111

def javascripts
  @javascripts ||= begin
    scripts = []
    @page.css('script[src]').each do |script|
      src = script[:src]
      next unless src

      # Clean and normalize URL
      src = src.strip

      begin
        absolute_url = make_absolute_url(src)
        scripts << absolute_url if absolute_url
      rescue URI::InvalidURIError, URI::BadURIError
        # Skip invalid URLs
      end
    end
    scripts.uniq.compact
  end
end

#language ⇒ `String`^?

Detect the page language

Returns:

(String, nil) —

Language code if detected, nil otherwise

# File 'lib/web_inspector/inspector.rb', line 157

def language
  # Check for html lang attribute first
  html_tag = @page.at('html')
  return html_tag['lang'] if html_tag && html_tag['lang'] && !html_tag['lang'].empty?

  # Then check for language meta tag
  lang_meta = @meta['content-language']
  return lang_meta if lang_meta && !lang_meta.empty?

  # Fallback to inspecting content headers if available
  nil
end

#links ⇒ `Array<String>`

Get all links from the page

Returns:

(Array<String>) —

Array of URLs

# File 'lib/web_inspector/inspector.rb', line 44

def links
  @links ||= begin
    links = []
    @page.css('a').each do |a|
      href = a[:href]
      next unless href

      # Skip javascript and mailto links
      next if href.start_with?('javascript:', 'mailto:', 'tel:')

      # Clean and normalize URL
      href = href.strip

      begin
        absolute_url = make_absolute_url(href)
        links << absolute_url if absolute_url
      rescue URI::InvalidURIError
        # Skip invalid URLs
      end
    end
    links.uniq
  end
end

#microdata ⇒ `Array<Hash>`

Extract microdata from the page

Returns:

(Array<Hash>) —

Array of microdata items

# File 'lib/web_inspector/inspector.rb', line 187

def microdata
  @microdata ||= begin
    items = []
    @page.css('[itemscope]').each do |scope|
      item = { type: scope['itemtype'] }
      properties = {}

      scope.css('[itemprop]').each do |prop|
        name = prop['itemprop']
        # Extract value based on tag
        value = case prop.name.downcase
                when 'meta'
                  prop['content']
                when 'img', 'audio', 'embed', 'iframe', 'source', 'track', 'video'
                  make_absolute_url(prop['src'])
                when 'a', 'area', 'link'
                  make_absolute_url(prop['href'])
                when 'time'
                  prop['datetime'] || prop.text.strip
                else
                  prop.text.strip
                end
        properties[name] = value
      end

      item[:properties] = properties
      items << item
    end
    items
  end
end

#mobile_friendly? ⇒ `Boolean`

Check if the page is mobile-friendly

Returns:

(Boolean) —

true if mobile-friendly

# File 'lib/web_inspector/inspector.rb', line 415

def mobile_friendly?
  @mobile_friendly ||= begin
    # Check for viewport meta tag
    viewport = @meta['viewport']
    has_viewport = !viewport.nil? && viewport.include?('width=device-width')

    # Check for responsive CSS (media queries)
    has_media_queries = stylesheets.any? || @page.to_html.include?('@media')

    has_viewport && has_media_queries
  end
end

#robots_txt_url ⇒ `String`

Get robots.txt URL

Returns:

(String) —

robots.txt URL



280
281
282

# File 'lib/web_inspector/inspector.rb', line 280

def robots_txt_url
  "#{@url.split('/')[0..2].join('/')}/robots.txt" if @url
end

#set_url(url, host) ⇒ `Object`

# File 'lib/web_inspector/inspector.rb', line 15

def set_url(url, host)
  @url = url
  @host = host
end

#sitemap_url ⇒ `Array<String>`

Get sitemap URL

Returns:

(Array<String>) —

Array of sitemap URLs

# File 'lib/web_inspector/inspector.rb', line 286

def sitemap_url
  @sitemap_url ||= begin
    sitemaps = []

    # Check for sitemap link tag
    @page.css('link[rel="sitemap"]').each do |link|
      href = link[:href]
      sitemaps << make_absolute_url(href) if href
    end

    # Add default sitemap.xml
    sitemaps << "#{@url.split('/')[0..2].join('/')}/sitemap.xml" if @url

    sitemaps.uniq.compact
  end
end

#social_links ⇒ `Hash`

Extract social media profile links

Returns:

(Hash) —

Hash of social platform => URL

# File 'lib/web_inspector/inspector.rb', line 254

def social_links
  @social_links ||= begin
    socials = {}
    platforms = {
      facebook: /facebook\.com/,
      twitter: /(twitter\.com|x\.com)/,
      linkedin: /linkedin\.com/,
      instagram: /instagram\.com/,
      youtube: /youtube\.com/,
      github: /github\.com/,
      tiktok: /tiktok\.com/
    }

    # Check links
    links.each do |link|
      platforms.each do |platform, pattern|
        socials[platform] ||= link if link.match?(pattern)
      end
    end

    socials
  end
end

#structured_data ⇒ `Array<Hash>`

Extract structured data (JSON-LD) from the page

Returns:

(Array<Hash>) —

Array of structured data objects

# File 'lib/web_inspector/inspector.rb', line 172

def structured_data
  @structured_data ||= begin
    data = []
    @page.css('script[type="application/ld+json"]').each do |script|
      parsed = JSON.parse(script.text)
      data << parsed if parsed
    rescue JSON::ParserError
      # Skip invalid JSON
    end
    data
  end
end

#stylesheets ⇒ `Array<String>`

Get stylesheets used by the page

Returns:

(Array<String>) —

Array of CSS file URLs

# File 'lib/web_inspector/inspector.rb', line 134

def stylesheets
  @stylesheets ||= begin
    styles = []
    @page.css('link[rel="stylesheet"]').each do |style|
      href = style[:href]
      next unless href

      # Clean and normalize URL
      href = href.strip

      begin
        absolute_url = make_absolute_url(href)
        styles << absolute_url if absolute_url
      rescue URI::InvalidURIError, URI::BadURIError
        # Skip invalid URLs
      end
    end
    styles.uniq.compact
  end
end

#tag_count ⇒ `Hash`

Count all tag types on the page

Returns:

(Hash) —

Counts of different HTML elements

# File 'lib/web_inspector/inspector.rb', line 221

def tag_count
  tags = {}
  @page.css('*').each do |element|
    tag_name = element.name.downcase
    tags[tag_name] ||= 0
    tags[tag_name] += 1
  end
  tags
end

#title ⇒ `Object`

# File 'lib/web_inspector/inspector.rb', line 20

def title
  @page.css('title').inner_text.strip
rescue StandardError
  nil
end

Class: WebInspector::Inspector

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(page) ⇒ Inspector

Instance Attribute Details

#host ⇒ Object (readonly)

#meta ⇒ Object (readonly)

#page ⇒ Object (readonly)

#url ⇒ Object (readonly)

Instance Method Details

#accessibility_score ⇒ Hash

#body ⇒ Object

#cms_info ⇒ Hash

#description ⇒ Object

#domain_images(user_domain, host = nil) ⇒ Array<String>

#domain_links(user_domain, host = nil) ⇒ Array<String>

#feeds ⇒ Array<String>

#find(words) ⇒ Array<Hash>

#images ⇒ Array<String>

#javascripts ⇒ Array<String>

#language ⇒ String?

#links ⇒ Array<String>

#microdata ⇒ Array<Hash>

#mobile_friendly? ⇒ Boolean

#robots_txt_url ⇒ String

#set_url(url, host) ⇒ Object

#sitemap_url ⇒ Array<String>

#social_links ⇒ Hash

#structured_data ⇒ Array<Hash>

#stylesheets ⇒ Array<String>

#tag_count ⇒ Hash

#title ⇒ Object

#initialize(page) ⇒ `Inspector`

#host ⇒ `Object` (readonly)

#meta ⇒ `Object` (readonly)

#page ⇒ `Object` (readonly)

#url ⇒ `Object` (readonly)

#accessibility_score ⇒ `Hash`

#body ⇒ `Object`

#cms_info ⇒ `Hash`

#description ⇒ `Object`

#domain_images(user_domain, host = nil) ⇒ `Array<String>`

#domain_links(user_domain, host = nil) ⇒ `Array<String>`

#feeds ⇒ `Array<String>`

#find(words) ⇒ `Array<Hash>`

#images ⇒ `Array<String>`

#javascripts ⇒ `Array<String>`

#language ⇒ `String`^?

#links ⇒ `Array<String>`

#microdata ⇒ `Array<Hash>`

#mobile_friendly? ⇒ `Boolean`

#robots_txt_url ⇒ `String`

#set_url(url, host) ⇒ `Object`

#sitemap_url ⇒ `Array<String>`

#social_links ⇒ `Hash`

#structured_data ⇒ `Array<Hash>`

#stylesheets ⇒ `Array<String>`

#tag_count ⇒ `Hash`

#title ⇒ `Object`