Class: OCRFile

Inherits:
Object
  • Object
show all
Defined in:
lib/ocrfile.rb

Instance Method Summary collapse

Constructor Details

#initialize(file, input_dir, output_dir, rel_path, tika) ⇒ OCRFile

Returns a new instance of OCRFile.



6
7
8
9
10
11
12
13
# File 'lib/ocrfile.rb', line 6

def initialize(file, input_dir, output_dir, rel_path, tika)
  @path = file
  @input_dir = input_dir
  @output_dir = output_dir
  @rel_path = rel_path
	@tika = tika
  @text = ""
end

Instance Method Details

#give_me_textObject

Send file to give me text



45
46
47
48
49
50
51
52
# File 'lib/ocrfile.rb', line 45

def give_me_text
  c = Curl::Easy.new("http://givemetext.okfnlabs.org/tika/tika/form")
  c.multipart_form_post = true
  c.http_post(Curl::PostField.file('file', @path))

	@text = c.body_str
  gotten_text_ok?(@text)
end

#give_me_text_local(mime_magic) ⇒ Object



54
55
56
57
58
59
60
61
62
63
# File 'lib/ocrfile.rb', line 54

def give_me_text_local(mime_magic)
	c = Curl::Easy.new(@tika + "/tika")
	file_data = File.read(@path)
	c.headers['Content-Type'] = mime_magic.type
	c.headers['Accept'] = "text/plain"
	c.http_put(file_data)

	@text = c.body_str
	gotten_text_ok?(@text)
end

#gotten_text_ok?(text) ⇒ Boolean

Checks if text was successfully extracted

Returns:

  • (Boolean)


66
67
68
# File 'lib/ocrfile.rb', line 66

def gotten_text_ok?(text)
  throw :extraction_error if text.include?("java.io.IOException: Stream Closed")
end

#load_extracted_text(file) ⇒ Object

Load text that is already extracted



39
40
41
42
# File 'lib/ocrfile.rb', line 39

def load_extracted_text(file)
	puts "file already exists"
  @text = JSON.parse(File.read(file))["text"]
end

#ocrObject

OCR file



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/ocrfile.rb', line 16

def ocr
  begin
 mime_magic = MimeMagic.by_path(@path)
    if File.exist?(@output_dir+@rel_path+".json")
      load_extracted_text(@output_dir+@rel_path+".json")
    else
      if @tika
        give_me_text_local(mime_magic)
      else
        give_me_text
      end
    end
  rescue
 # Detect errors
    # binding.pry
    error_file = @path + "\n"
    IO.write(@output_dir+"/error_log.txt", error_file, mode: 'a')
  end
  
  return @text
end