Class: Ragdoll::MigrationService

Inherits:
Object
  • Object
show all
Defined in:
app/services/ragdoll/migration_service.rb

Overview

Migration service to transition from multi-modal to unified text-based RAG system

Defined Under Namespace

Classes: MigrationError

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeMigrationService

Returns a new instance of MigrationService.



16
17
18
19
# File 'app/services/ragdoll/migration_service.rb', line 16

def initialize
  @converter = Ragdoll::DocumentConverter.new
  @unified_management = Ragdoll::UnifiedDocumentManagement.new
end

Class Method Details

.migrate_all_documents(**options) ⇒ Object



8
9
10
# File 'app/services/ragdoll/migration_service.rb', line 8

def self.migrate_all_documents(**options)
  new.migrate_all_documents(**options)
end

.migrate_document(document_id, **options) ⇒ Object



12
13
14
# File 'app/services/ragdoll/migration_service.rb', line 12

def self.migrate_document(document_id, **options)
  new.migrate_document(document_id, **options)
end

Instance Method Details

#create_comparison_reportObject

Create comparison report between old and new systems



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'app/services/ragdoll/migration_service.rb', line 78

def create_comparison_report
  return { error: "UnifiedDocument model not available" } unless defined?(Ragdoll::UnifiedDocument)

  old_stats = Ragdoll::Document.stats
  new_stats = Ragdoll::UnifiedDocument.stats
  content_stats = Ragdoll::UnifiedContent.stats

  {
    migration_summary: {
      old_system: {
        total_documents: old_stats[:total_documents],
        text_contents: old_stats[:total_text_contents],
        image_contents: old_stats[:total_image_contents],
        audio_contents: old_stats[:total_audio_contents],
        total_embeddings: old_stats[:total_embeddings]
      },
      new_system: {
        total_documents: new_stats[:total_documents],
        unified_contents: content_stats[:total_contents],
        total_embeddings: new_stats[:total_embeddings],
        by_media_type: content_stats[:by_media_type]
      }
    },
    benefits: {
      simplified_architecture: "Single content model instead of STI",
      unified_search: "All content searchable through text",
      cross_modal_retrieval: "Images and audio searchable via descriptions/transcripts",
      reduced_complexity: "One embedding pipeline instead of multiple"
    },
    recommendations: generate_migration_recommendations
  }
end

#migrate_all_documents(**options) ⇒ Object

Migrate all existing documents to unified text-based system



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'app/services/ragdoll/migration_service.rb', line 22

def migrate_all_documents(**options)
  return { error: "UnifiedDocument model not available" } unless defined?(Ragdoll::UnifiedDocument)

  migration_stats = {
    started_at: Time.current,
    total_documents: 0,
    migrated: 0,
    skipped: 0,
    errors: []
  }

  puts "🚀 Starting migration from multi-modal to unified text-based system..."

  # Get all existing documents
  Ragdoll::Document.find_each(batch_size: options[:batch_size] || 50) do |document|
    migration_stats[:total_documents] += 1

    begin
      result = migrate_single_document(document, **options)
      if result[:status] == :migrated
        migration_stats[:migrated] += 1
      else
        migration_stats[:skipped] += 1
      end
    rescue StandardError => e
      migration_stats[:errors] << {
        document_id: document.id,
        title: document.title,
        error: e.message
      }
      puts "❌ Error migrating document #{document.id}: #{e.message}"
    end

    # Progress reporting
    if migration_stats[:total_documents] % 10 == 0
      puts "📊 Progress: #{migration_stats[:migrated]} migrated, #{migration_stats[:skipped]} skipped, #{migration_stats[:errors].length} errors"
    end
  end

  migration_stats[:completed_at] = Time.current
  migration_stats[:duration] = migration_stats[:completed_at] - migration_stats[:started_at]

  puts "✅ Migration completed!"
  puts "📊 Final stats: #{migration_stats[:migrated]} migrated, #{migration_stats[:skipped]} skipped, #{migration_stats[:errors].length} errors"
  puts "⏱️  Duration: #{migration_stats[:duration].round(2)} seconds"

  migration_stats
end

#migrate_document(document_id, **options) ⇒ Object

Migrate a specific document



72
73
74
75
# File 'app/services/ragdoll/migration_service.rb', line 72

def migrate_document(document_id, **options)
  document = Ragdoll::Document.find(document_id)
  migrate_single_document(document, **options)
end

#validate_migrationObject

Validate migrated data integrity



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'app/services/ragdoll/migration_service.rb', line 112

def validate_migration
  return { error: "UnifiedDocument model not available" } unless defined?(Ragdoll::UnifiedDocument)

  validation_results = {
    total_checks: 0,
    passed: 0,
    failed: 0,
    issues: []
  }

  puts "🔍 Validating migration integrity..."

  # Check 1: All documents have corresponding unified documents
  validation_results[:total_checks] += 1
  old_count = Ragdoll::Document.count
  new_count = Ragdoll::UnifiedDocument.count

  if old_count == new_count
    validation_results[:passed] += 1
    puts "✅ Document count matches: #{old_count} = #{new_count}"
  else
    validation_results[:failed] += 1
    validation_results[:issues] << "Document count mismatch: #{old_count} old vs #{new_count} new"
    puts "❌ Document count mismatch: #{old_count} old vs #{new_count} new"
  end

  # Check 2: All unified documents have content
  validation_results[:total_checks] += 1
  documents_without_content = Ragdoll::UnifiedDocument.without_content.count

  if documents_without_content == 0
    validation_results[:passed] += 1
    puts "✅ All unified documents have content"
  else
    validation_results[:failed] += 1
    validation_results[:issues] << "#{documents_without_content} documents without content"
    puts "#{documents_without_content} documents without content"
  end

  # Check 3: Content quality assessment
  validation_results[:total_checks] += 1
  quality_stats = content_quality_report

  if quality_stats[:high_quality_percentage] >= 50
    validation_results[:passed] += 1
    puts "✅ Content quality acceptable: #{quality_stats[:high_quality_percentage]}% high quality"
  else
    validation_results[:failed] += 1
    validation_results[:issues] << "Low content quality: only #{quality_stats[:high_quality_percentage]}% high quality"
    puts "⚠️  Content quality concern: only #{quality_stats[:high_quality_percentage]}% high quality"
  end

  validation_results[:quality_report] = quality_stats
  validation_results
end