Class: LlmTranslate::DocumentSplitter

Inherits:
Object
  • Object
show all
Defined in:
lib/llm_translate/document_splitter.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config, logger = nil) ⇒ DocumentSplitter

Returns a new instance of DocumentSplitter.



7
8
9
10
# File 'lib/llm_translate/document_splitter.rb', line 7

def initialize(config, logger = nil)
  @config = config
  @logger = logger || Logger.new($stdout, level: :info)
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



5
6
7
# File 'lib/llm_translate/document_splitter.rb', line 5

def config
  @config
end

#loggerObject (readonly)

Returns the value of attribute logger.



5
6
7
# File 'lib/llm_translate/document_splitter.rb', line 5

def logger
  @logger
end

Instance Method Details

#merge_translated_chunks(translated_chunks) ⇒ Object

合并翻译后的文档片段



26
27
28
29
30
31
32
33
# File 'lib/llm_translate/document_splitter.rb', line 26

def merge_translated_chunks(translated_chunks)
  return translated_chunks.first if translated_chunks.length == 1

  logger.info "Merging #{translated_chunks.length} translated chunks..."

  # 简单合并,用双换行连接
  translated_chunks.join("\n\n")
end

#split_document(content) ⇒ Object

拆分文档为多个片段



13
14
15
16
17
18
19
20
21
22
23
# File 'lib/llm_translate/document_splitter.rb', line 13

def split_document(content)
  return [content] unless should_split?(content)

  logger.info "Document size (#{content.length} chars) exceeds limit, splitting..."

  sections = extract_markdown_sections(content)
  chunks = build_chunks(sections)

  logger.info "Document split into #{chunks.length} chunks"
  chunks
end