Class: Paperclip::Document::Processors::Reader

Inherits:
Paperclip::Document::Processor show all
Defined in:
lib/paperclip/document/processors/reader.rb

Overview

This processor extract the OCR text of the file

Instance Attribute Summary collapse

Attributes inherited from Paperclip::Document::Processor

#instance, #tmp_dir

Instance Method Summary collapse

Methods inherited from Paperclip::Document::Processor

#basename, #file_path

Constructor Details

#initialize(file, options = {}, attachment = nil) ⇒ Reader

Returns a new instance of Reader.



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/paperclip/document/processors/reader.rb', line 8

def initialize(file, options = {}, attachment = nil)
  super(file, options, attachment)
  if @options[:text_column].nil? && text_column?
    @options[:text_column] = default_text_column
  end
  @language = @options[:language]
  @text_column = @options[:text_column]
  unless @text_column
    raise Paperclip::Error, 'No content text column given'
  end
  @clean = (RUBY_VERSION >= '2.0' ? false : options.key?(:clean) ? !!options[:clean] : true)
end

Instance Attribute Details

#cleanObject

Returns the value of attribute clean.



6
7
8
# File 'lib/paperclip/document/processors/reader.rb', line 6

def clean
  @clean
end

#languageObject

Returns the value of attribute language.



6
7
8
# File 'lib/paperclip/document/processors/reader.rb', line 6

def language
  @language
end

#text_columnObject

Returns the value of attribute text_column.



6
7
8
# File 'lib/paperclip/document/processors/reader.rb', line 6

def text_column
  @text_column
end

Instance Method Details

#default_text_columnObject

Returns the name of the default text column



47
48
49
# File 'lib/paperclip/document/processors/reader.rb', line 47

def default_text_column
  @attachment.name.to_s + '_content_text'
end

#makeObject

Extract the text of all the document



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/paperclip/document/processors/reader.rb', line 22

def make
  destination_path = tmp_dir.to_s
  options = { output: destination_path, clean: @clean }
  options[:language] = (language.is_a?(Proc) ? language.call(attachment.instance) : language)
  Docsplit.extract_text(file_path.to_s, options)

  destination_file = File.join(destination_path, basename + '.txt')
  instance = @attachment.instance
  f = File.open(destination_file)
  instance[text_column] = f.read
  instance.run_callbacks(:save) { false }
  f.close

  File.open(file.path)
end

#text_column?Boolean

Check if the default text column is present

Returns:

  • (Boolean)


39
40
41
42
43
44
# File 'lib/paperclip/document/processors/reader.rb', line 39

def text_column?
  expected_column = default_text_column
  instance.class.columns.detect do |column|
    column.name.to_s == expected_column
  end
end