Class: Ast::Merge::FencedCodeBlockDetector

Inherits:
RegionDetectorBase show all
Defined in:
lib/ast/merge/fenced_code_block_detector.rb

Overview

Detects fenced code blocks with a specific language identifier.

This detector finds Markdown-style fenced code blocks (using “‘ or ~~~) that have a specific language identifier. It can be configured for any language: ruby, json, yaml, mermaid, etc.

Examples:

Detecting Ruby code blocks

detector = FencedCodeBlockDetector.new("ruby", aliases: ["rb"])
regions = detector.detect_all(markdown_source)

Using factory methods

detector = FencedCodeBlockDetector.ruby
detector = FencedCodeBlockDetector.yaml
detector = FencedCodeBlockDetector.json

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from RegionDetectorBase

#name, #strip_delimiters?

Constructor Details

#initialize(language, aliases: []) ⇒ FencedCodeBlockDetector

Creates a new detector for the specified language.

Parameters:

  • language (String, Symbol)

    The language identifier (e.g., “ruby”, “json”)

  • aliases (Array<String, Symbol>) (defaults to: [])

    Alternative identifiers (e.g., [“rb”] for ruby)



32
33
34
35
36
37
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 32

def initialize(language, aliases: [])
  super()
  @language = language.to_s.downcase
  @aliases = aliases.map { |a| a.to_s.downcase }
  @all_identifiers = [@language] + @aliases
end

Instance Attribute Details

#aliasesArray<String> (readonly)

Returns Alternative language identifiers.

Returns:

  • (Array<String>)

    Alternative language identifiers



26
27
28
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 26

def aliases
  @aliases
end

#languageString (readonly)

Returns The primary language identifier.

Returns:

  • (String)

    The primary language identifier



23
24
25
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 23

def language
  @language
end

Class Method Details

.bashFencedCodeBlockDetector

Creates a detector for Bash/Shell code blocks.



181
182
183
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 181

def bash
  new("bash", aliases: ["sh", "shell", "zsh"])
end

.cssFencedCodeBlockDetector

Creates a detector for CSS code blocks.



199
200
201
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 199

def css
  new("css")
end

.htmlFencedCodeBlockDetector

Creates a detector for HTML code blocks.



193
194
195
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 193

def html
  new("html")
end

.javascriptFencedCodeBlockDetector

Creates a detector for JavaScript code blocks.



163
164
165
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 163

def javascript
  new("javascript", aliases: ["js"])
end

.jsonFencedCodeBlockDetector

Creates a detector for JSON code blocks.



139
140
141
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 139

def json
  new("json")
end

.markdownFencedCodeBlockDetector

Creates a detector for Markdown code blocks (nested markdown).



205
206
207
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 205

def markdown
  new("markdown", aliases: ["md"])
end

.mermaidFencedCodeBlockDetector

Creates a detector for Mermaid diagram blocks.



157
158
159
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 157

def mermaid
  new("mermaid")
end

.pythonFencedCodeBlockDetector

Creates a detector for Python code blocks.



175
176
177
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 175

def python
  new("python", aliases: ["py"])
end

.rubyFencedCodeBlockDetector

Creates a detector for Ruby code blocks.



133
134
135
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 133

def ruby
  new("ruby", aliases: ["rb"])
end

.sqlFencedCodeBlockDetector

Creates a detector for SQL code blocks.



187
188
189
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 187

def sql
  new("sql")
end

.tomlFencedCodeBlockDetector

Creates a detector for TOML code blocks.



151
152
153
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 151

def toml
  new("toml")
end

.typescriptFencedCodeBlockDetector

Creates a detector for TypeScript code blocks.



169
170
171
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 169

def typescript
  new("typescript", aliases: ["ts"])
end

.yamlFencedCodeBlockDetector

Creates a detector for YAML code blocks.



145
146
147
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 145

def yaml
  new("yaml", aliases: ["yml"])
end

Instance Method Details

#detect_all(source) ⇒ Array<Region>

Detects all fenced code blocks with the configured language.

Parameters:

  • source (String)

    The full document content

Returns:

  • (Array<Region>)

    All detected code blocks, sorted by start_line



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 56

def detect_all(source)
  return [] if source.nil? || source.empty?

  regions = []
  lines = source.lines
  in_block = false
  start_line = nil
  content_lines = []
  current_language = nil
  fence_char = nil
  fence_length = nil
  indent = ""

  lines.each_with_index do |line, idx|
    line_num = idx + 1

    if !in_block
      # Match opening fence: ```lang or ~~~lang (optionally indented)
      match = line.match(/^(\s*)(`{3,}|~{3,})(\w*)\s*$/)
      if match
        indent = match[1] || ""
        fence = match[2]
        lang = match[3].downcase

        if @all_identifiers.include?(lang)
          in_block = true
          start_line = line_num
          content_lines = []
          current_language = lang
          fence_char = fence[0]
          fence_length = fence.length
        end
      end
    elsif line.match?(/^#{Regexp.escape(indent)}#{Regexp.escape(fence_char)}{#{fence_length},}\s*$/)
      # Match closing fence (must use same char, same indent, and at least same length)
      opening_fence = "#{fence_char * fence_length}#{current_language}"
      closing_fence = fence_char * fence_length

      regions << build_region(
        type: region_type,
        content: content_lines.join,
        start_line: start_line,
        end_line: line_num,
        delimiters: [opening_fence, closing_fence],
        metadata: {language: current_language, indent: indent.empty? ? nil : indent},
      )
      in_block = false
      start_line = nil
      content_lines = []
      current_language = nil
      fence_char = nil
      fence_length = nil
      indent = ""
    else
      # Accumulate content lines (strip the indent if present)
      content_lines << if indent.empty?
        line
      else
        # Strip the common indent from content lines
        line.sub(/^#{Regexp.escape(indent)}/, "")
      end
    end
  end

  # Note: Unclosed blocks are ignored (no region created)
  regions
end

#inspectString

Returns A description of this detector.

Returns:

  • (String)

    A description of this detector



125
126
127
128
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 125

def inspect
  aliases_str = @aliases.empty? ? "" : " aliases=#{@aliases.inspect}"
  "#<#{self.class.name} language=#{@language}#{aliases_str}>"
end

#matches_language?(lang) ⇒ Boolean

Check if a language identifier matches this detector.

Parameters:

  • lang (String)

    The language identifier to check

Returns:

  • (Boolean)

    true if the language matches



48
49
50
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 48

def matches_language?(lang)
  @all_identifiers.include?(lang.to_s.downcase)
end

#region_typeSymbol

Returns The region type (e.g., :ruby_code_block).

Returns:

  • (Symbol)

    The region type (e.g., :ruby_code_block)



40
41
42
# File 'lib/ast/merge/fenced_code_block_detector.rb', line 40

def region_type
  :"#{@language}_code_block"
end