Class: Ollama::Documents::Splitters::RecursiveCharacter

Inherits:
Object
  • Object
show all
Defined in:
lib/ollama/documents/splitters/character.rb

Constant Summary collapse

DEFAULT_SEPARATORS =
[
  /(?:\r?\n){2,}/,
  /\r?\n/,
  /\b/,
  //,
].freeze

Instance Method Summary collapse

Constructor Details

#initialize(separators: DEFAULT_SEPARATORS, include_separator: false, chunk_size: 4096) ⇒ RecursiveCharacter

Returns a new instance of RecursiveCharacter.



44
45
46
47
48
49
# File 'lib/ollama/documents/splitters/character.rb', line 44

def initialize(separators: DEFAULT_SEPARATORS, include_separator: false, chunk_size: 4096)
  separators.empty? and
    raise ArgumentError, "non-empty array of separators required"
  @separators, @include_separator, @chunk_size =
    separators, include_separator, chunk_size
end

Instance Method Details

#split(text, separators: @separators) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/ollama/documents/splitters/character.rb', line 51

def split(text, separators: @separators)
  separators.empty? and return [ text ]
  separators = separators.dup
  separator = separators.shift
  texts = Character.new(
    separator:,
    include_separator: @include_separator,
    chunk_size: @chunk_size
  ).split(text)
  texts.count == 0 and return [ text ]
  texts.inject([]) do |r, t|
    if t.size > @chunk_size
      r.concat(split(t, separators:))
    else
      r.concat([ t ])
    end
  end
end