Module: Legion::Extensions::Knowledge::Helpers::Parser

Defined in:
lib/legion/extensions/knowledge/helpers/parser.rb

Class Method Summary collapse

Class Method Details

.parse(file_path:) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/legion/extensions/knowledge/helpers/parser.rb', line 10

def parse(file_path:)
  ext = ::File.extname(file_path).downcase

  case ext
  when '.md'
    parse_markdown(file_path: file_path)
  when '.txt'
    parse_text(file_path: file_path)
  else
    [{ error: 'unsupported format', source_file: file_path }]
  end
end

.parse_markdown(file_path:) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/legion/extensions/knowledge/helpers/parser.rb', line 23

def parse_markdown(file_path:)
  content = ::File.read(file_path, encoding: 'utf-8')
  sections = []
  current_heading = ::File.basename(file_path, '.*')
  current_lines   = []
  section_path    = []

  content.each_line do |line|
    if line.start_with?('# ')
      flush_section(sections, current_heading, section_path, current_lines, file_path) unless current_lines.empty?
      current_heading = line.sub(/^#+\s*/, '').chomp
      section_path    = [current_heading]
      current_lines   = []
    elsif line.start_with?('## ')
      flush_section(sections, current_heading, section_path, current_lines, file_path) unless current_lines.empty?
      current_heading = line.sub(/^#+\s*/, '').chomp
      section_path    = section_path.first(1) + [current_heading]
      current_lines   = []
    else
      current_lines << line
    end
  end

  flush_section(sections, current_heading, section_path, current_lines, file_path) unless current_lines.empty?

  sections.empty? ? [{ heading: ::File.basename(file_path, '.*'), section_path: [], content: content.strip, source_file: file_path }] : sections
end

.parse_text(file_path:) ⇒ Object



51
52
53
54
55
56
# File 'lib/legion/extensions/knowledge/helpers/parser.rb', line 51

def parse_text(file_path:)
  content = ::File.read(file_path, encoding: 'utf-8')
  heading = ::File.basename(file_path, '.*')

  [{ heading: heading, section_path: [], content: content.strip, source_file: file_path }]
end