Class: Ragdoll::TextContent
- Inherits:
-
Content
- Object
- ActiveRecord::Base
- Content
- Ragdoll::TextContent
show all
- Defined in:
- app/models/ragdoll/text_content.rb
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Content
search_content, #should_generate_embeddings?
Class Method Details
.stats ⇒ Object
127
128
129
130
131
132
133
134
135
|
# File 'app/models/ragdoll/text_content.rb', line 127
def self.stats
{
total_text_contents: count,
by_model: group(:embedding_model).count,
total_embeddings: joins(:embeddings).count,
average_word_count: average("LENGTH(content) - LENGTH(REPLACE(content, ' ', '')) + 1"),
average_chunk_size: average(:chunk_size)
}
end
|
Instance Method Details
#character_count ⇒ Object
51
52
53
|
# File 'app/models/ragdoll/text_content.rb', line 51
def character_count
content&.length || 0
end
|
#chunk_size ⇒ Object
Text-specific processing configuration stored in content metadata This metadata is about the raw content processing, not AI-generated insights
14
15
16
|
# File 'app/models/ragdoll/text_content.rb', line 14
def chunk_size
metadata.dig('chunk_size') || 1000
end
|
#chunk_size=(value) ⇒ Object
18
19
20
|
# File 'app/models/ragdoll/text_content.rb', line 18
def chunk_size=(value)
self.metadata = metadata.merge('chunk_size' => value)
end
|
#chunks ⇒ Object
Text-specific processing methods
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
# File 'app/models/ragdoll/text_content.rb', line 60
def chunks
return [] if content.blank?
chunks = []
start_pos = 0
while start_pos < content.length
end_pos = [start_pos + chunk_size, content.length].min
if end_pos < content.length
last_space = content.rindex(" ", end_pos)
end_pos = last_space if last_space && last_space > start_pos
end
chunk_content = content[start_pos...end_pos].strip
if chunk_content.present?
chunks << {
content: chunk_content,
start_position: start_pos,
end_position: end_pos,
chunk_index: chunks.length
}
end
break if end_pos >= content.length
start_pos = [end_pos - overlap, start_pos + 1].max
end
chunks
end
|
#content_for_embedding ⇒ Object
Override content for embedding to use the text content
123
124
125
|
# File 'app/models/ragdoll/text_content.rb', line 123
def content_for_embedding
content
end
|
#embedding_count ⇒ Object
55
56
57
|
# File 'app/models/ragdoll/text_content.rb', line 55
def embedding_count
embeddings.count
end
|
#encoding ⇒ Object
Content-specific technical metadata (file processing info)
31
32
33
|
# File 'app/models/ragdoll/text_content.rb', line 31
def encoding
metadata.dig('encoding')
end
|
#encoding=(value) ⇒ Object
35
36
37
|
# File 'app/models/ragdoll/text_content.rb', line 35
def encoding=(value)
self.metadata = metadata.merge('encoding' => value)
end
|
#generate_embeddings! ⇒ Object
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
# File 'app/models/ragdoll/text_content.rb', line 93
def generate_embeddings!
return if content.blank?
embeddings.destroy_all
chunks = Ragdoll::TextChunker.chunk(content)
embedding_service = Ragdoll::EmbeddingService.new
chunks.each_with_index do |chunk_text, index|
begin
vector = embedding_service.generate_embedding(chunk_text)
embeddings.create!(
content: chunk_text,
embedding_vector: vector,
chunk_index: index
)
rescue StandardError => e
puts "Failed to generate embedding for chunk #{index}: #{e.message}"
end
end
update!(metadata: (metadata || {}).merge("embeddings_generated_at" => Time.current))
end
|
#line_count ⇒ Object
39
40
41
|
# File 'app/models/ragdoll/text_content.rb', line 39
def line_count
metadata.dig('line_count')
end
|
#line_count=(value) ⇒ Object
43
44
45
|
# File 'app/models/ragdoll/text_content.rb', line 43
def line_count=(value)
self.metadata = metadata.merge('line_count' => value)
end
|
#overlap ⇒ Object
22
23
24
|
# File 'app/models/ragdoll/text_content.rb', line 22
def overlap
metadata.dig('overlap') || 200
end
|
#overlap=(value) ⇒ Object
26
27
28
|
# File 'app/models/ragdoll/text_content.rb', line 26
def overlap=(value)
self.metadata = metadata.merge('overlap' => value)
end
|
#word_count ⇒ Object
47
48
49
|
# File 'app/models/ragdoll/text_content.rb', line 47
def word_count
content&.split&.length || 0
end
|