Class: NewspaperWorks::TextExtractionDerivativeService
Constant Summary
NewspaperPageDerivativeService::TARGET_EXT
Instance Attribute Summary
#file_set, #master_format
Instance Method Summary
collapse
#convert_cmd, #derivative_path_factory, #identify, #im_convert, #jp2_convert, #jp2_to_intermediate, #load_destpath, #mime_type, #one_bit?, #prepare_path, target_ext, #use_color?, #valid?
Constructor Details
Returns a new instance of TextExtractionDerivativeService.
3
4
5
6
7
|
# File 'app/services/newspaper_works/text_extraction_derivative_service.rb', line 3
def initialize(file_set)
super(file_set)
@alto_path = nil
@txt_path = nil
end
|
Instance Method Details
#cleanup_derivatives ⇒ Object
50
51
52
53
54
|
# File 'app/services/newspaper_works/text_extraction_derivative_service.rb', line 50
def cleanup_derivatives
super('txt')
super('xml')
super('json')
end
|
#create_derivatives(src) ⇒ Object
9
10
11
12
13
14
15
|
# File 'app/services/newspaper_works/text_extraction_derivative_service.rb', line 9
def create_derivatives(src)
from_alto = NewspaperWorks::TextFormatsFromALTOService.new(
file_set
)
return from_alto.create_derivatives(src) unless from_alto.alto_path.nil?
create_derivatives_from_ocr(src)
end
|
#create_derivatives_from_ocr(filename) ⇒ Object
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
# File 'app/services/newspaper_works/text_extraction_derivative_service.rb', line 17
def create_derivatives_from_ocr(filename)
@source_path = filename
@alto_path = prepare_path('xml')
@txt_path = prepare_path('txt')
@json_path = prepare_path('json')
ocr = NewspaperWorks::TextExtraction::PageOCR.new(filename)
write_plain_text(ocr.plain)
write_alto(ocr.alto)
write_json(ocr.word_json)
end
|
#write_alto(xml) ⇒ Object
32
33
34
35
36
|
# File 'app/services/newspaper_works/text_extraction_derivative_service.rb', line 32
def write_alto(xml)
File.open(@alto_path, 'w') do |outfile|
outfile.write(xml)
end
end
|
#write_json(text) ⇒ Object
44
45
46
47
48
|
# File 'app/services/newspaper_works/text_extraction_derivative_service.rb', line 44
def write_json(text)
File.open(@json_path, 'w') do |outfile|
outfile.write(text)
end
end
|
#write_plain_text(text) ⇒ Object
38
39
40
41
42
|
# File 'app/services/newspaper_works/text_extraction_derivative_service.rb', line 38
def write_plain_text(text)
File.open(@txt_path, 'w') do |outfile|
outfile.write(text)
end
end
|