Class: ParseFile

Inherits:
Object
  • Object
show all
Defined in:
lib/parsefile.rb

Instance Method Summary collapse

Constructor Details

#initialize(file, input_dir, output_dir, tika) ⇒ ParseFile

Returns a new instance of ParseFile.



10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/parsefile.rb', line 10

def initialize(file, input_dir, output_dir, tika)
  @path = file
  @input_dir = input_dir
  @output_dir = output_dir
	# Pass the url for a custom (or local) Tika server
	# Else use OKFNs service over normal HTTP... ZOMG... O.o
	if tika
 @tika = tika
	else
 @tika = nil
	end
end

Instance Method Details

#gen_outputObject

Generate output



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/parsefile.rb', line 42

def gen_output
  outhash = Hash.new
  outhash[:full_path] = @path
  outhash.merge!(@metadata)
  begin
    outhash[:text] = @text.to_s.encode('UTF-8', {
                                         :invalid => :replace,
                                         :undef   => :replace,
                                         :replace => '?'
                                       })
    return JSON.pretty_generate(outhash)
  rescue
    binding.pry
  end
end

#parse_fileObject

Parse the file



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/parsefile.rb', line 24

def parse_file
  begin
  # Get metadata
  m = ExtractMetadata.new(@path, @input_dir, @output_dir)
  @metadata = m.extract

  # OCR File
  o = OCRFile.new(@path, @input_dir, @output_dir, @metadata[:rel_path], @tika)
  @text = o.ocr

  # Generate output and return
  gen_output
  rescue #TODO: Fix!
    binding.pry
  end
end