Class: StatusMcp::Server::FetchStatusTool

Inherits:

BaseTool

Object
FastMcp::Tool
BaseTool
StatusMcp::Server::FetchStatusTool

show all

Defined in:: lib/status_mcp/server.rb

Constant Summary collapse

MAX_RESPONSE_SIZE = Maximum response size (1MB) to protect against zip bombs and crawler protection pages

1 * 1024 * 1024

Instance Method Summary collapse

Instance Method Details

#build_feed_urls(status_url) ⇒ `Object`

# File 'lib/status_mcp/server.rb', line 643

def build_feed_urls(status_url)
  uri = URI(status_url)
  base_path = uri.path.chomp("/")

  # Common RSS/Atom feed patterns
  feed_patterns = [
    "/feed.rss",
    "/feed.atom",
    "/rss",
    "/atom",
    "/feed",
    "/status.rss",
    "/status.atom"
  ]

  feed_urls = []
  feed_patterns.each do |pattern|
    feed_path = base_path.empty? ? pattern : "#{base_path}#{pattern}"
    feed_uri = uri.dup
    feed_uri.path = feed_path
    feed_urls << feed_uri.to_s
  end

  feed_urls
end

#build_history_url(status_url) ⇒ `Object`

# File 'lib/status_mcp/server.rb', line 458

def build_history_url(status_url)
  uri = URI(status_url)

  # Don't build history URL if we're already on a history page
  return nil if uri.path.end_with?("/history")

  # Common patterns for history pages
  base_path = uri.path.chomp("/")

  # Try /history
  history_path = base_path.empty? ? "/history" : "#{base_path}/history"
  history_uri = uri.dup
  history_uri.path = history_path

  history_uri.to_s
end

#build_incident_io_api_url(status_url) ⇒ `Object`

# File 'lib/status_mcp/server.rb', line 491

def build_incident_io_api_url(status_url)
  uri = URI(status_url)

  # Check if this looks like an incident.io status page
  # Pattern: https://status.example.com/proxy/status.example.com
  host = uri.host

  # Build the incident.io API URL
  api_path = "/proxy/#{host}"
  api_uri = uri.dup
  api_uri.path = api_path

  api_uri.to_s
end

#call(status_url:, max_length: 10000) ⇒ `Object`

# File 'lib/status_mcp/server.rb', line 230

def call(status_url:, max_length: 10000)
  # Try incident.io API first (only if we detect it's an incident.io page)
  api_info = nil
  api_url = nil
  # Only try incident.io API for known incident.io domains or if we detect it
  if might_be_incident_io?(status_url)
    begin
      api_url = build_incident_io_api_url(status_url)
      if api_url
        api_info = fetch_and_parse_incident_io_api(api_url, max_length)
        # If API returns error, don't use it
        if api_info&.dig(:error)&.include?("404")
          api_info = nil
        end
      end
    rescue => e
      # Not an incident.io page or API failed, continue with other methods
    end
  end

  # Try RSS/Atom feeds (they're more reliable for JS-rendered pages)
  feed_urls = build_feed_urls(status_url)
  feed_info = nil
  successful_feed_url = nil

  if !api_info || (!api_info[:history]&.any? && !api_info[:latest_status])
    feed_urls.each do |feed_url|
      feed_info = fetch_and_parse_feed(feed_url, max_length)
      if feed_info && (feed_info[:history]&.any? || feed_info[:latest_status])
        successful_feed_url = feed_url
        break
      end
    rescue => e
      # Try next feed URL
      next
    end
  end

  # Fetch main status page (as fallback or supplement)
  main_info = nil
  begin
    main_info = fetch_and_extract(status_url, max_length)
  rescue => e
    # If we have feed info, that's okay
    main_info = {latest_status: nil, history: [], messages: [], error: nil} unless feed_info
  end

  # Try to fetch history page if it exists
  history_url = build_history_url(status_url)
  history_info = nil

  if history_url && history_url != status_url
    begin
      history_info = fetch_and_extract(history_url, max_length, history_only: true)
    rescue => e
      # Silently fail if history page doesn't exist or has errors
      # This is expected for many status pages
    end
  end

  # Merge results (prioritize API data, then feed data, then HTML)
  combined_history = []
  if api_info && api_info[:history]&.any?
    combined_history.concat(api_info[:history])
  end
  if feed_info && feed_info[:history]&.any?
    combined_history.concat(feed_info[:history])
  end
  combined_history.concat(main_info[:history] || []) if main_info && main_info[:history]
  combined_history.concat(history_info[:history] || []) if history_info && history_info[:history]

  # Remove duplicates (simple text-based deduplication)
  combined_history = combined_history.uniq { |item| item[0..100] }

  # Determine latest status (prioritize API, then HTML page, then feed)
  # HTML page is more reliable than feed fallback statuses
  latest_status = api_info[:latest_status] if api_info && api_info[:latest_status]
  latest_status ||= main_info[:latest_status] if main_info && main_info[:latest_status]
  # Only use feed status if HTML didn't find one (feed statuses are often fallbacks)
  latest_status ||= feed_info[:latest_status] if feed_info && feed_info[:latest_status]

  # Get HTTP status code from main page (most reliable)
  http_status_code = main_info[:http_status_code] if main_info && main_info[:http_status_code]

  # Only include errors if they're meaningful
  final_error = nil
  if main_info && main_info[:error] && !main_info[:error].empty?
    # Don't show JS-rendered page errors if we got data from feeds/API
    final_error = if main_info[:error].include?("JavaScript-rendered") && (feed_info && (feed_info[:history]&.any? || feed_info[:latest_status]) || api_info && (api_info[:history]&.any? || api_info[:latest_status]))
      # JS-rendered page but we got data from other sources, that's fine
      nil
    else
      main_info[:error]
    end
  # Only show feed/API errors if we didn't get any useful data from HTML
  elsif !main_info || !main_info[:latest_status]
    if feed_info && feed_info[:error] && !feed_info[:error].empty? && !feed_info[:history]&.any? && !feed_info[:latest_status]
      # Don't show "Not a valid RSS or Atom feed" - it's not really an error, just no feed available
      unless feed_info[:error].include?("Not a valid RSS or Atom feed")
        final_error = feed_info[:error]
      end
    elsif api_info && api_info[:error] && !api_info[:error].empty? && !api_info[:history]&.any? && !api_info[:latest_status]
      final_error = api_info[:error]
    end
  end

  {
    status_url: status_url,
    api_url: api_url,
    feed_url: successful_feed_url,
    history_url: history_url,
    latest_status: latest_status,
    history: combined_history.first(20), # Limit to 20 most recent
    messages: (api_info && api_info[:messages]) || (feed_info && feed_info[:messages]) || (main_info && main_info[:messages]) || [],
    extracted_at: Time.now.iso8601,
    error: final_error,
    http_status_code: http_status_code
  }
rescue => e
  error_message = if e.is_a?(StatusMcp::ResponseSizeExceededError)
    "Response size limit exceeded: #{e.message}"
  else
    "Error fetching status: #{e.message}"
  end

  {
    status_url: status_url,
    error: error_message,
    latest_status: nil,
    history: [],
    messages: []
  }
end

#fetch_and_extract(url, max_length, history_only: false) ⇒ `Object`

# File 'lib/status_mcp/server.rb', line 420

def fetch_and_extract(url, max_length, history_only: false)
  response = fetch_with_redirects(url)
  http_status_code = response.code.to_i

  unless response.is_a?(Net::HTTPSuccess)
    return {
      error: "Failed to fetch: #{response.code} #{response.message}",
      http_status_code: http_status_code,
      latest_status: nil,
      history: [],
      messages: []
    }
  end

  html_body = response.body || ""
  # Additional size check (already checked in fetch_with_redirects, but double-check for safety)
  if html_body.bytesize > MAX_RESPONSE_SIZE
    raise StatusMcp::ResponseSizeExceededError.new(html_body.bytesize, MAX_RESPONSE_SIZE, uri: url)
  end

  uri = URI(url)

  # Validate and parse HTML
  doc = validate_and_parse_html(html_body, uri)

  # Extract status information and include HTTP status code
  if history_only
    {
      latest_status: nil,
      history: extract_history(doc),
      messages: [],
      http_status_code: http_status_code
    }
  else
    extract_status_info(doc, max_length).merge(http_status_code: http_status_code)
  end
end

#fetch_and_parse_feed(feed_url, max_length) ⇒ `Object`

# File 'lib/status_mcp/server.rb', line 669

def fetch_and_parse_feed(feed_url, max_length)
  response = fetch_with_redirects(feed_url, accept: "application/rss+xml,application/atom+xml,application/xml,text/xml,*/*;q=0.9")

  unless response.is_a?(Net::HTTPSuccess)
    return {
      error: "Failed to fetch feed: #{response.code} #{response.message}",
      latest_status: nil,
      history: [],
      messages: []
    }
  end

  feed_body = response.body || ""
  # Additional size check (already checked in fetch_with_redirects, but double-check for safety)
  if feed_body.bytesize > MAX_RESPONSE_SIZE
    raise StatusMcp::ResponseSizeExceededError.new(feed_body.bytesize, MAX_RESPONSE_SIZE, uri: feed_url)
  end

  # Parse RSS/Atom feed
  parse_feed(feed_body, max_length)
rescue StatusMcp::ResponseSizeExceededError
  # Re-raise response size errors
  raise
rescue => e
  {
    error: "Error parsing feed: #{e.message}",
    latest_status: nil,
    history: [],
    messages: []
  }
end

#fetch_and_parse_incident_io_api(api_url, max_length) ⇒ `Object`

# File 'lib/status_mcp/server.rb', line 506

def fetch_and_parse_incident_io_api(api_url, max_length)
  response = fetch_with_redirects(api_url, accept: "application/json")

  unless response.is_a?(Net::HTTPSuccess)
    return {
      error: "Failed to fetch API: #{response.code} #{response.message}",
      latest_status: nil,
      history: [],
      messages: []
    }
  end

  json_body = response.body || ""
  # Additional size check (already checked in fetch_with_redirects, but double-check for safety)
  if json_body.bytesize > MAX_RESPONSE_SIZE
    raise StatusMcp::ResponseSizeExceededError.new(json_body.bytesize, MAX_RESPONSE_SIZE, uri: api_url)
  end

  # Parse JSON response
  parse_incident_io_api(json_body, max_length)
rescue StatusMcp::ResponseSizeExceededError
  # Re-raise response size errors
  raise
rescue JSON::ParserError => e
  {
    error: "Error parsing API JSON: #{e.message}",
    latest_status: nil,
    history: [],
    messages: []
  }
rescue => e
  {
    error: "Error fetching API: #{e.message}",
    latest_status: nil,
    history: [],
    messages: []
  }
end

#fetch_with_redirects(url, max_redirects: 5, accept: "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8") ⇒ `Object`

# File 'lib/status_mcp/server.rb', line 364

def fetch_with_redirects(url, max_redirects: 5, accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
  current_url = url
  redirect_count = 0

  while redirect_count < max_redirects
    uri = URI(current_url)
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = (uri.scheme == "https")
    if http.use_ssl?
      http.verify_mode = OpenSSL::SSL::VERIFY_PEER
      http.ca_file = OpenSSL::X509::DEFAULT_CERT_FILE if File.exist?(OpenSSL::X509::DEFAULT_CERT_FILE)
    end
    http.read_timeout = 10
    http.open_timeout = 10

    request = Net::HTTP::Get.new(uri)
    request["User-Agent"] = "Mozilla/5.0 (compatible; StatusMcp/1.0)"
    request["Accept"] = accept

    response = http.request(request)

    # Handle redirects (301, 302, 307, 308)
    if response.is_a?(Net::HTTPRedirection) && response["location"]
      redirect_count += 1
      location = response["location"]
      # Handle relative redirects
      current_url = URI.join(current_url, location).to_s
      next
    end

    # Check response size before returning (protect against zip bombs and crawler protection)
    if response.is_a?(Net::HTTPSuccess)
      # Check Content-Length header first if available (optimization to avoid reading large bodies)
      content_length = response["Content-Length"]
      if content_length
        content_length_int = content_length.to_i
        if content_length_int > MAX_RESPONSE_SIZE
          raise StatusMcp::ResponseSizeExceededError.new(content_length_int, MAX_RESPONSE_SIZE, uri: uri.to_s)
        end
      end

      # Read body and check actual size (Content-Length might be missing or incorrect)
      response_body = response.body || ""
      response_size = response_body.bytesize
      if response_size > MAX_RESPONSE_SIZE
        raise StatusMcp::ResponseSizeExceededError.new(response_size, MAX_RESPONSE_SIZE, uri: uri.to_s)
      end
    end

    return response
  end

  # Too many redirects
  raise "Too many redirects (max: #{max_redirects})"
end

#might_be_incident_io?(status_url) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/status_mcp/server.rb', line 475

def might_be_incident_io?(status_url)
  # Known incident.io domains
  incident_io_domains = [
    "status.openai.com",
    "status.notion.so",
    "status.zapier.com",
    "status.buffer.com"
  ]

  uri = URI(status_url)
  return true if incident_io_domains.include?(uri.host)

  # Could add HTML check here in the future
  false
end

#parse_feed(feed_body, max_length) ⇒ `Object`

# File 'lib/status_mcp/server.rb', line 701

def parse_feed(feed_body, max_length)
  doc = Nokogiri::XML(feed_body)

  # Determine feed type (RSS or Atom)
  is_atom = doc.root&.name == "feed" || doc.at("feed")
  is_rss = doc.root&.name == "rss" || doc.at("rss")

  unless is_rss || is_atom
    return {
      error: "Not a valid RSS or Atom feed",
      latest_status: nil,
      history: [],
      messages: []
    }
  end

  history_items = []
  latest_status = nil

  if is_rss
    # Parse RSS feed
    items = doc.css("item")
    items.each do |item|
      title = item.css("title").first&.text&.strip || ""
      description = item.css("description").first&.text&.strip || ""
      pub_date = item.css("pubDate").first&.text&.strip || ""

      # Clean HTML from description
      if description.include?("<")
        desc_doc = Nokogiri::HTML(description)
        # Remove lists and other HTML elements, get clean text
        desc_doc.css("ul, ol, li, br").each { |el| el.replace("\n") }
        description = desc_doc.text.strip
        # Normalize whitespace
        description = description.gsub(/\n{2,}/, "\n").gsub(/[ \t]{2,}/, " ").strip
      end

      # Extract status from description (look for "Status: ..." patterns)
      # Try to get just the status word (Resolved, Operational, etc.)
      status_match = description.match(/Status:\s*([A-Za-z]+)/i) || title.match(/Status:\s*([A-Za-z]+)/i)
      if status_match && !latest_status
        status_word = status_match[1].strip
        # Only use if it's a known status word
        if status_word.match?(/^(Resolved|Operational|Degraded|Down|Investigating|Monitoring|Identified|Partial|Major|Minor)$/i)
          latest_status = status_word
        end
      end

      # Build history item (clean up description first)
      # Remove status line and component lists from description for cleaner output
      clean_description = description.dup
      clean_description = clean_description.gsub(/Status:\s*[^\n]+/i, "").strip
      clean_description = clean_description.gsub(/Affected components[^\n]*/i, "").strip
      clean_description = clean_description.gsub(/\(Operational\)/i, "").strip
      clean_description = clean_description.gsub(/\n{2,}/, "\n").strip

      item_text = title.to_s
      if clean_description && !clean_description.empty? && clean_description.length > 10
        item_text += " - #{clean_description[0..500]}" # Limit description length
      end
      item_text += " (#{pub_date})" if pub_date && !pub_date.empty?

      history_items << purify_text(item_text) if item_text.length >= 20
    end
  elsif is_atom
    # Parse Atom feed
    entries = doc.css("entry")
    entries.each do |entry|
      title = entry.css("title").first&.text&.strip || ""
      content = entry.css("content").first&.text&.strip || entry.css("summary").first&.text&.strip || ""
      updated = entry.css("updated").first&.text&.strip || entry.css("published").first&.text&.strip || ""

      # Clean HTML from content
      if content.include?("<")
        content_doc = Nokogiri::HTML(content)
        # Remove lists and other HTML elements, get clean text
        content_doc.css("ul, ol, li, br").each { |el| el.replace("\n") }
        content = content_doc.text.strip
        # Normalize whitespace
        content = content.gsub(/\n{2,}/, "\n").gsub(/[ \t]{2,}/, " ").strip
      end

      # Extract status from content (look for "Status: ..." patterns)
      # Try to get just the status word (Resolved, Operational, etc.)
      status_match = content.match(/Status:\s*([A-Za-z]+)/i) || title.match(/Status:\s*([A-Za-z]+)/i)
      if status_match && !latest_status
        status_word = status_match[1].strip
        # Only use if it's a known status word
        if status_word.match?(/^(Resolved|Operational|Degraded|Down|Investigating|Monitoring|Identified|Partial|Major|Minor)$/i)
          latest_status = status_word
        end
      end

      # Build history item (clean up content first)
      # Remove status line and component lists from content for cleaner output
      clean_content = content.dup
      clean_content = clean_content.gsub(/Status:\s*[^\n]+/i, "").strip
      clean_content = clean_content.gsub(/Affected components[^\n]*/i, "").strip
      clean_content = clean_content.gsub(/\(Operational\)/i, "").strip
      clean_content = clean_content.gsub(/\n{2,}/, "\n").strip

      item_text = title.to_s
      if clean_content && !clean_content.empty? && clean_content.length > 10
        item_text += " - #{clean_content[0..500]}" # Limit content length
      end
      item_text += " (#{updated})" if updated && !updated.empty?

      history_items << purify_text(item_text) if item_text.length >= 20
    end
  end

  # Determine overall status from feed title or latest item
  unless latest_status
    feed_title = doc.css("channel > title, feed > title").first&.text&.strip
    # Only use feed title if it's a short status word, not a page title
    if feed_title && feed_title.length < 50 && feed_title.match?(/^(operational|degraded|down|outage|incident|maintenance|all systems operational)$/i)
      latest_status = feed_title
    elsif history_items.any?
      # Check if all items are scheduled maintenance (not actual incidents)
      scheduled_count = history_items.count { |item| item.match?(/scheduled|maintenance/i) && !item.match?(/incident|outage|degraded|down|investigating/i) }

      # Check if all items are resolved
      resolved_count = history_items.count { |item| item.match?(/resolved|operational/i) && !item.match?(/investigating|monitoring|identified/i) }

      # If all are scheduled maintenance or all resolved, likely operational
      latest_status = if scheduled_count == history_items.length || (resolved_count == history_items.length && history_items.length > 0)
        "Operational"
      # If there are active incidents (investigating, monitoring, identified)
      elsif history_items.any? { |item| item.match?(/investigating|monitoring|identified|degraded|down|outage/i) && !item.match?(/resolved|operational/i) }
        "See recent incidents"
      # If we have history but can't determine, default to operational (better than "See recent incidents")
      else
        "Operational"
      end
    end
  end

  # Truncate if needed
  total_length = history_items.join("\n").length
  if total_length > max_length
    history_items = truncate_array(history_items, max_length)
  end

  {
    latest_status: latest_status,
    history: history_items,
    messages: []
  }
end

#parse_incident_io_api(json_body, max_length) ⇒ `Object`

# File 'lib/status_mcp/server.rb', line 545

def parse_incident_io_api(json_body, max_length)
  data = JSON.parse(json_body)
  summary = data["summary"] || {}

  ongoing_incidents = summary["ongoing_incidents"] || []
  scheduled_maintenances = summary["scheduled_maintenances"] || []
  components = summary["components"] || []

  history_items = []
  messages = []
  latest_status = nil

  # Extract from ongoing incidents
  ongoing_incidents.each do |incident|
    title = incident["name"] || "Ongoing Incident"
    status = incident["status"] || "Investigating"
    description = incident["description"] || ""

    # Clean HTML from description if present
    if description.include?("<")
      desc_doc = Nokogiri::HTML(description)
      description = desc_doc.text.strip
    end

    item_text = "#{title} - Status: #{status}"
    item_text += " - #{description[0..300]}" if description && !description.empty?

    history_items << purify_text(item_text)

    # Use first incident's status as latest status
    latest_status ||= status
  end

  # Extract from scheduled maintenances
  scheduled_maintenances.each do |maintenance|
    title = maintenance["name"] || "Scheduled Maintenance"
    status = maintenance["status"] || "Scheduled"
    description = maintenance["description"] || ""

    # Clean HTML from description if present
    if description.include?("<")
      desc_doc = Nokogiri::HTML(description)
      description = desc_doc.text.strip
    end

    scheduled_for = maintenance["scheduled_for"] || ""
    scheduled_until = maintenance["scheduled_until"] || ""

    item_text = "#{title} - Status: #{status}"
    item_text += " - Scheduled: #{scheduled_for}" if scheduled_for && !scheduled_for.empty?
    item_text += " until #{scheduled_until}" if scheduled_until && !scheduled_until.empty?
    item_text += " - #{description[0..300]}" if description && !description.empty?

    history_items << purify_text(item_text)
  end

  # Determine overall status if no incidents
  unless latest_status
    if ongoing_incidents.empty? && scheduled_maintenances.empty?
      # Check component statuses
      all_operational = components.all? do |comp|
        comp_status = comp["status"] || comp["operational_status"]
        comp_status&.downcase&.include?("operational") || comp_status.nil?
      end

      if all_operational
        latest_status = "Operational"
      else
        # Find non-operational components
        non_operational = components.select do |comp|
          comp_status = comp["status"] || comp["operational_status"]
          comp_status && !comp_status.downcase.include?("operational")
        end

        latest_status = if non_operational.any?
          "Degraded Performance"
        else
          "Operational"
        end
      end
    else
      latest_status = "See incidents"
    end
  end

  # Truncate if needed
  total_length = history_items.join("\n").length
  if total_length > max_length
    history_items = truncate_array(history_items, max_length)
  end

  {
    latest_status: latest_status,
    history: history_items,
    messages: messages
  }
end

Class: StatusMcp::Server::FetchStatusTool

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#build_feed_urls(status_url) ⇒ Object

#build_history_url(status_url) ⇒ Object

#build_incident_io_api_url(status_url) ⇒ Object

#call(status_url:, max_length: 10000) ⇒ Object

#fetch_and_extract(url, max_length, history_only: false) ⇒ Object

#fetch_and_parse_feed(feed_url, max_length) ⇒ Object

#fetch_and_parse_incident_io_api(api_url, max_length) ⇒ Object

#fetch_with_redirects(url, max_redirects: 5, accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") ⇒ Object

#might_be_incident_io?(status_url) ⇒ Boolean

#parse_feed(feed_body, max_length) ⇒ Object

#parse_incident_io_api(json_body, max_length) ⇒ Object

#build_feed_urls(status_url) ⇒ `Object`

#build_history_url(status_url) ⇒ `Object`

#build_incident_io_api_url(status_url) ⇒ `Object`

#call(status_url:, max_length: 10000) ⇒ `Object`

#fetch_and_extract(url, max_length, history_only: false) ⇒ `Object`

#fetch_and_parse_feed(feed_url, max_length) ⇒ `Object`

#fetch_and_parse_incident_io_api(api_url, max_length) ⇒ `Object`

#fetch_with_redirects(url, max_redirects: 5, accept: "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8") ⇒ `Object`

#might_be_incident_io?(status_url) ⇒ `Boolean`

#parse_feed(feed_body, max_length) ⇒ `Object`

#parse_incident_io_api(json_body, max_length) ⇒ `Object`