Module: Aircana::Contexts::ConfluenceContent

Included in:
Confluence
Defined in:
lib/aircana/contexts/confluence_content.rb

Instance Method Summary collapse

Instance Method Details

#convert_to_markdown(html_content) ⇒ Object



14
15
16
17
18
19
20
# File 'lib/aircana/contexts/confluence_content.rb', line 14

def convert_to_markdown(html_content)
  return "" if html_content.nil? || html_content.empty?

  # Preprocess Confluence macros before converting to Markdown
  cleaned_html = preprocess_confluence_macros(html_content)
  ReverseMarkdown.convert(cleaned_html, github_flavored: true)
end

#fetch_page_content(page_id) ⇒ Object



6
7
8
9
10
11
12
# File 'lib/aircana/contexts/confluence_content.rb', line 6

def fetch_page_content(page_id)
  Aircana.human_logger.info("Looking for page with ID `#{page_id}`")
  response = get_page_content(page_id)
  response.dig("body", "storage", "value") || ""
rescue HTTParty::Error, StandardError => e
  handle_api_error("fetch content for page #{page_id}", e, "Failed to fetch page content")
end

#log_pages_found(count, kb_name) ⇒ Object

rubocop:enable Layout/LineLength, Metrics/MethodLength



63
64
65
# File 'lib/aircana/contexts/confluence_content.rb', line 63

def log_pages_found(count, kb_name)
  Aircana.human_logger.info "Found #{count} pages for KB '#{kb_name}'"
end

#preprocess_confluence_macros(html) ⇒ Object

rubocop:disable Layout/LineLength, Metrics/MethodLength



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/aircana/contexts/confluence_content.rb', line 23

def preprocess_confluence_macros(html)
  # Process Confluence structured macros to make them compatible with Markdown conversion
  cleaned = html.dup

  # Remove empty code blocks (common issue with Confluence API)
  cleaned.gsub!(
    %r{<ac:structured-macro[^>]*ac:name="code"[^>]*>.*?<ac:plain-text-body>\s*</ac:plain-text-body>.*?</ac:structured-macro>}m, ""
  )

  # Convert panel macros to blockquotes, preserving inner content
  cleaned.gsub!(
    %r{<ac:structured-macro[^>]*ac:name="panel"[^>]*>.*?<ac:rich-text-body>(.*?)</ac:rich-text-body>.*?</ac:structured-macro>}m, '<blockquote>\1</blockquote>'
  )

  # Convert info/note/warning macros to blockquotes with indicators
  cleaned.gsub!(
    %r{<ac:structured-macro[^>]*ac:name="info"[^>]*>.*?<ac:rich-text-body>(.*?)</ac:rich-text-body>.*?</ac:structured-macro>}m, '<blockquote><strong>ℹ️ Info:</strong> \1</blockquote>'
  )
  cleaned.gsub!(
    %r{<ac:structured-macro[^>]*ac:name="note"[^>]*>.*?<ac:rich-text-body>(.*?)</ac:rich-text-body>.*?</ac:structured-macro>}m, '<blockquote><strong>📝 Note:</strong> \1</blockquote>'
  )
  cleaned.gsub!(
    %r{<ac:structured-macro[^>]*ac:name="warning"[^>]*>.*?<ac:rich-text-body>(.*?)</ac:rich-text-body>.*?</ac:structured-macro>}m, '<blockquote><strong>⚠️ Warning:</strong> \1</blockquote>'
  )

  # Strip other structured macros but preserve rich text body content
  cleaned.gsub!(
    %r{<ac:structured-macro[^>]*>.*?<ac:rich-text-body>(.*?)</ac:rich-text-body>.*?</ac:structured-macro>}m, '\1'
  )

  # Remove any remaining Confluence-specific tags
  cleaned.gsub!(%r{</?ac:[^>]*>}m, "")

  # Clean up Confluence parameter tags
  cleaned.gsub!(%r{<ac:parameter[^>]*>.*?</ac:parameter>}m, "")

  cleaned
end

#store_page_as_markdown(page, kb_name, kb_type = "local") ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
# File 'lib/aircana/contexts/confluence_content.rb', line 67

def store_page_as_markdown(page, kb_name, kb_type = "local")
  content = page&.dig("body", "storage", "value") || fetch_page_content(page&.[]("id"))
  markdown_content = convert_to_markdown(content)

  @local_storage.store_content(
    title: page&.[]("title"),
    content: markdown_content,
    kb_name: kb_name,
    kb_type: kb_type
  )
end