Class: Ragdoll::UnifiedDocumentManagement

Inherits:
Object
  • Object
show all
Defined in:
app/services/ragdoll/unified_document_management.rb

Overview

Unified document management service for text-based RAG system Handles the entire pipeline from document ingestion to searchable text embeddings

Defined Under Namespace

Classes: ProcessingError

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeUnifiedDocumentManagement

Returns a new instance of UnifiedDocumentManagement.



21
22
23
# File 'app/services/ragdoll/unified_document_management.rb', line 21

def initialize
  @converter = Ragdoll::DocumentConverter.new
end

Class Method Details

.add_document(file_path, **options) ⇒ Object



9
10
11
# File 'app/services/ragdoll/unified_document_management.rb', line 9

def self.add_document(file_path, **options)
  new.add_document(file_path, **options)
end

.add_document_from_upload(uploaded_file, **options) ⇒ Object



13
14
15
# File 'app/services/ragdoll/unified_document_management.rb', line 13

def self.add_document_from_upload(uploaded_file, **options)
  new.add_document_from_upload(uploaded_file, **options)
end

.process_document(document_id) ⇒ Object



17
18
19
# File 'app/services/ragdoll/unified_document_management.rb', line 17

def self.process_document(document_id)
  new.process_document(document_id)
end

Instance Method Details

#add_document(file_path, **options) ⇒ Object

Add a document from file path



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'app/services/ragdoll/unified_document_management.rb', line 26

def add_document(file_path, **options)
  return nil unless File.exist?(file_path)

  # Determine document type
  document_type = @converter.determine_document_type(file_path)

  # Convert to text
  text_content = @converter.convert_to_text(file_path, document_type)

  # Create document
  document = create_unified_document(
    location: File.expand_path(file_path),
    document_type: document_type,
    text_content: text_content,
    **options
  )

  # Process asynchronously if requested
  if options[:async]
    process_document_async(document.id)
  else
    process_document_sync(document)
  end

  document
end

#add_document_from_upload(uploaded_file, **options) ⇒ Object

Add a document from uploaded file



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'app/services/ragdoll/unified_document_management.rb', line 54

def add_document_from_upload(uploaded_file, **options)
  # Create temporary file to process
  temp_file = nil
  begin
    temp_file = create_temp_file_from_upload(uploaded_file)
    document_type = @converter.determine_document_type(temp_file.path)
    text_content = @converter.convert_to_text(temp_file.path, document_type)

    # Create document
    document = create_unified_document(
      location: uploaded_file.original_filename || "uploaded_file",
      document_type: document_type,
      text_content: text_content,
      **options
    )

    # Process asynchronously if requested
    if options[:async]
      process_document_async(document.id)
    else
      process_document_sync(document)
    end

    document
  ensure
    temp_file&.close
    temp_file&.unlink if temp_file&.path
  end
end

#batch_process_documents(file_paths, **options) ⇒ Object

Batch processing for multiple documents



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'app/services/ragdoll/unified_document_management.rb', line 133

def batch_process_documents(file_paths, **options)
  results = []
  errors = []

  file_paths.each do |file_path|
    begin
      document = add_document(file_path, **options)
      results << document
    rescue StandardError => e
      errors << { file_path: file_path, error: e.message }
    end
  end

  {
    processed: results,
    errors: errors,
    total: file_paths.length,
    success_count: results.length,
    error_count: errors.length
  }
end

#process_document(document_id) ⇒ Object

Process a document by ID



85
86
87
88
89
90
91
92
93
94
# File 'app/services/ragdoll/unified_document_management.rb', line 85

def process_document(document_id)
  if defined?(Ragdoll::UnifiedDocument)
    document = Ragdoll::UnifiedDocument.find(document_id)
  else
    # Fallback to regular Document
    document = Ragdoll::Document.find(document_id)
  end

  process_document_sync(document)
end

#processing_statsObject

Get processing statistics



165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'app/services/ragdoll/unified_document_management.rb', line 165

def processing_stats
  if defined?(Ragdoll::UnifiedDocument)
    base_stats = Ragdoll::UnifiedDocument.stats
    content_stats = Ragdoll::UnifiedContent.stats
  else
    base_stats = Ragdoll::Document.stats
    content_stats = Ragdoll::Content.stats
  end

  {
    documents: base_stats,
    content: content_stats,
    processing_summary: {
      total_documents: base_stats[:total_documents],
      processed_documents: base_stats.dig(:by_status, "processed") || 0,
      total_embeddings: base_stats[:total_embeddings],
      average_processing_time: estimate_average_processing_time
    }
  }
end

#reprocess_document(document_id, **options) ⇒ Object

Reprocess document with new text conversion



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'app/services/ragdoll/unified_document_management.rb', line 97

def reprocess_document(document_id, **options)
  if defined?(Ragdoll::UnifiedDocument)
    document = Ragdoll::UnifiedDocument.find(document_id)
  else
    document = Ragdoll::Document.find(document_id)
  end

  return nil unless File.exist?(document.location)

  # Re-convert to text
  document_type = @converter.determine_document_type(document.location)
  text_content = @converter.convert_to_text(document.location, document_type, **options)

  # Update document content
  if document.respond_to?(:unified_contents)
    # Unified document approach
    if document.unified_contents.any?
      document.unified_contents.first.update!(content: text_content)
    else
      document.unified_contents.create!(
        content: text_content,
        original_media_type: document_type,
        embedding_model: "text-embedding-3-large",
        metadata: { "reprocessed_at" => Time.current }
      )
    end
  else
    # Fallback to content field
    document.content = text_content
  end

  # Reprocess
  process_document_sync(document)
end

#search_documents(query, **options) ⇒ Object

Search across all documents



156
157
158
159
160
161
162
# File 'app/services/ragdoll/unified_document_management.rb', line 156

def search_documents(query, **options)
  if defined?(Ragdoll::UnifiedDocument)
    Ragdoll::UnifiedDocument.search_content(query, **options)
  else
    Ragdoll::Document.search_content(query, **options)
  end
end