Class: ParseFile

Inherits:
Object
  • Object
show all
Defined in:
lib/parsefile.rb

Instance Method Summary collapse

Constructor Details

#initialize(file, input_dir, output_dir, tika) ⇒ ParseFile

Returns a new instance of ParseFile.



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/parsefile.rb', line 8

def initialize(file, input_dir, output_dir, tika)
  @path = file
  @input_dir = input_dir
  @output_dir = output_dir
	# Pass URL of a Tika server
	if tika
 @tika = tika
	# Use OKFNs service over normal HTTP... ZOMG... O.o
	else
 @tika = nil
	end
end

Instance Method Details

#gen_outputObject



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/parsefile.rb', line 38

def gen_output
  outhash = Hash.new
  outhash[:full_path] = @path
  outhash.merge!(@metadata)
  begin
    outhash[:text] = @text.to_s.encode('UTF-8', {
                                         :invalid => :replace,
                                         :undef   => :replace,
                                         :replace => '?'
                                       })
    return JSON.pretty_generate(outhash)
  rescue
    binding.pry
  end
end

#parse_fileObject



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/parsefile.rb', line 21

def parse_file
  begin
 puts "sending file: " + @path

    m = ExtractMetadata.new(@path, @input_dir, @output_dir)
    @metadata = m.extract

    o = OCRFile.new(@path, @input_dir, @output_dir, @metadata[:rel_path], @tika)
    @text = o.ocr

    gen_output
  rescue
 #TODO: use a global debug / log
    binding.pry
  end
end