Class: Ragdoll::TextExtractionService

Inherits:
Object
  • Object
show all
Defined in:
app/services/ragdoll/text_extraction_service.rb

Defined Under Namespace

Classes: ExtractionError

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file_path, document_type = nil) ⇒ TextExtractionService

Returns a new instance of TextExtractionService.



16
17
18
19
20
# File 'app/services/ragdoll/text_extraction_service.rb', line 16

def initialize(file_path, document_type = nil)
  @file_path = file_path
  @document_type = document_type || determine_document_type
  @file_extension = File.extname(file_path).downcase
end

Class Method Details

.extract(file_path, document_type = nil) ⇒ Object



12
13
14
# File 'app/services/ragdoll/text_extraction_service.rb', line 12

def self.extract(file_path, document_type = nil)
  new(file_path, document_type).extract
end

Instance Method Details

#extractObject



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'app/services/ragdoll/text_extraction_service.rb', line 22

def extract
  case @document_type
  when "pdf"
    extract_from_pdf
  when "docx"
    extract_from_docx
  when "text", "markdown"
    extract_from_text
  when "html"
    extract_from_html
  when "csv"
    extract_from_csv
  when "json"
    extract_from_json
  when "xml"
    extract_from_xml
  when "yaml"
    extract_from_yaml
  else
    extract_from_text # Default fallback
  end
end