Class: Ragdoll::TextContent

Inherits:
Content
  • Object
show all
Defined in:
app/models/ragdoll/text_content.rb

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Content

search_content, #should_generate_embeddings?

Class Method Details

.statsObject



127
128
129
130
131
132
133
134
135
# File 'app/models/ragdoll/text_content.rb', line 127

def self.stats
  {
    total_text_contents:  count,
    by_model:             group(:embedding_model).count,
    total_embeddings:     joins(:embeddings).count,
    average_word_count:   average("LENGTH(content) - LENGTH(REPLACE(content, ' ', '')) + 1"),
    average_chunk_size:   average(:chunk_size)
  }
end

Instance Method Details

#character_countObject



51
52
53
# File 'app/models/ragdoll/text_content.rb', line 51

def character_count
  content&.length || 0
end

#chunk_sizeObject

Text-specific processing configuration stored in content metadata This metadata is about the raw content processing, not AI-generated insights



14
15
16
# File 'app/models/ragdoll/text_content.rb', line 14

def chunk_size
  .dig('chunk_size') || 1000
end

#chunk_size=(value) ⇒ Object



18
19
20
# File 'app/models/ragdoll/text_content.rb', line 18

def chunk_size=(value)
  self. = .merge('chunk_size' => value)
end

#chunksObject

Text-specific processing methods



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'app/models/ragdoll/text_content.rb', line 60

def chunks
  return [] if content.blank?

  chunks = []
  start_pos = 0

  while start_pos < content.length
    end_pos = [start_pos + chunk_size, content.length].min

    # Try to break at word boundary if not at end
    if end_pos < content.length
      last_space = content.rindex(" ", end_pos)
      end_pos = last_space if last_space && last_space > start_pos
    end

    chunk_content = content[start_pos...end_pos].strip
    if chunk_content.present?
      chunks << {
        content: chunk_content,
        start_position: start_pos,
        end_position: end_pos,
        chunk_index: chunks.length
      }
    end

    break if end_pos >= content.length

    start_pos = [end_pos - overlap, start_pos + 1].max
  end

  chunks
end

#content_for_embeddingObject

Override content for embedding to use the text content



123
124
125
# File 'app/models/ragdoll/text_content.rb', line 123

def content_for_embedding
  content
end

#embedding_countObject



55
56
57
# File 'app/models/ragdoll/text_content.rb', line 55

def embedding_count
  embeddings.count
end

#encodingObject

Content-specific technical metadata (file processing info)



31
32
33
# File 'app/models/ragdoll/text_content.rb', line 31

def encoding
  .dig('encoding')
end

#encoding=(value) ⇒ Object



35
36
37
# File 'app/models/ragdoll/text_content.rb', line 35

def encoding=(value)
  self. = .merge('encoding' => value)
end

#generate_embeddings!Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'app/models/ragdoll/text_content.rb', line 93

def generate_embeddings!
  return if content.blank?

  # Clear existing embeddings
  embeddings.destroy_all

  # Use TextChunker to split content into manageable chunks
  chunks = Ragdoll::TextChunker.chunk(content)

  # Generate embeddings for each chunk
  embedding_service = Ragdoll::EmbeddingService.new

  chunks.each_with_index do |chunk_text, index|
    begin
      vector = embedding_service.generate_embedding(chunk_text)

      embeddings.create!(
        content: chunk_text,
        embedding_vector: vector,
        chunk_index: index
      )
    rescue StandardError => e
      puts "Failed to generate embedding for chunk #{index}: #{e.message}"
    end
  end

  update!(metadata: ( || {}).merge("embeddings_generated_at" => Time.current))
end

#line_countObject



39
40
41
# File 'app/models/ragdoll/text_content.rb', line 39

def line_count
  .dig('line_count')
end

#line_count=(value) ⇒ Object



43
44
45
# File 'app/models/ragdoll/text_content.rb', line 43

def line_count=(value)
  self. = .merge('line_count' => value)
end

#overlapObject



22
23
24
# File 'app/models/ragdoll/text_content.rb', line 22

def overlap
  .dig('overlap') || 200
end

#overlap=(value) ⇒ Object



26
27
28
# File 'app/models/ragdoll/text_content.rb', line 26

def overlap=(value)
  self. = .merge('overlap' => value)
end

#word_countObject



47
48
49
# File 'app/models/ragdoll/text_content.rb', line 47

def word_count
  content&.split&.length || 0
end