Class: Baran::MarkdownSplitter

Inherits:
RecursiveCharacterTextSplitter show all
Defined in:
lib/baran/markdown_splitter.rb

Instance Attribute Summary

Attributes inherited from RecursiveCharacterTextSplitter

#separators

Attributes inherited from TextSplitter

#chunk_overlap, #chunk_size

Instance Method Summary collapse

Methods inherited from RecursiveCharacterTextSplitter

#splitted

Methods inherited from TextSplitter

#chunks, #joined, #merged, #splitted

Constructor Details

#initialize(chunk_size: 1024, chunk_overlap: 64) ⇒ MarkdownSplitter

Returns a new instance of MarkdownSplitter.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/baran/markdown_splitter.rb', line 5

def initialize(chunk_size: 1024, chunk_overlap: 64)
  separators = [
    "\n# ", # h1
    "\n## ", # h2
    "\n### ", # h3
    "\n#### ", # h4
    "\n##### ", # h5
    "\n###### ", # h6
    "```\n\n", # code block
    "\n\n***\n\n", # horizontal rule
    "\n\n---\n\n", # horizontal rule
    "\n\n___\n\n", # horizontal rule
    "\n\n", # new line
    "\n", # new line
    " ", # space
    "" # empty
  ]
  super(chunk_size: chunk_size, chunk_overlap: chunk_overlap, separators: separators)
end