Class: IiifPrint::TextExtractionDerivativeService

Inherits:
BaseDerivativeService show all
Defined in:
lib/iiif_print/text_extraction_derivative_service.rb

Instance Attribute Summary

Attributes inherited from BaseDerivativeService

#file_set, #master_format

Instance Method Summary collapse

Methods inherited from BaseDerivativeService

#convert_cmd, #derivative_path_factory, #identify, #im_convert, #jp2_convert, #jp2_to_intermediate, #load_destpath, #mime_type, #mime_type_for, #one_bit?, #prepare_path, #use_color?, #valid?

Constructor Details

#initialize(file_set) ⇒ TextExtractionDerivativeService

Returns a new instance of TextExtractionDerivativeService.



12
13
14
# File 'lib/iiif_print/text_extraction_derivative_service.rb', line 12

def initialize(file_set)
  super(file_set)
end

Instance Method Details

#cleanup_derivativesObject



46
47
48
49
50
# File 'lib/iiif_print/text_extraction_derivative_service.rb', line 46

def cleanup_derivatives(*)
  ocr_derivatives.keys do |extension|
    super(extension.to_s)
  end
end

#create_derivatives(src) ⇒ Object



16
17
18
19
20
21
22
# File 'lib/iiif_print/text_extraction_derivative_service.rb', line 16

def create_derivatives(src)
  from_alto = alto_derivative_service_class.new(
    file_set
  )
  return from_alto.create_derivatives(src) unless from_alto.alto_path.nil?
  create_derivatives_from_ocr(src)
end

#create_derivatives_from_ocr(filename) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/iiif_print/text_extraction_derivative_service.rb', line 24

def create_derivatives_from_ocr(filename)
  # TODO: Do we need this source_path instance variable?
  @source_path = filename
  ocr = page_ocr_service_class.new(filename)

  ocr_derivatives.each do |extension, method_name|
    path = prepare_path(extension.to_s)
    content = ocr.public_send(method_name)
    next if content.blank?

    write(content: content, path: path, extension: extension)
  end
end

#write(content:, path:, extension:) ⇒ Object



38
39
40
41
42
43
44
# File 'lib/iiif_print/text_extraction_derivative_service.rb', line 38

def write(content:, path:, extension:)
  mime_type = mime_type_for(extension)
  File.open(path, 'w') do |outfile|
    outfile.write(content)
    IiifPrint.copy_derivatives_from_data_store(stream: content, directives: { url: path, container: 'extracted_text', mime_type: mime_type })
  end
end