Class: TikaMasala::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/tika-masala/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(jar_path = nil) ⇒ Parser

Returns a new instance of Parser.

Raises:



10
11
12
13
14
15
16
17
18
# File 'lib/tika-masala/parser.rb', line 10

def initialize(jar_path = nil)
  if jar_path.nil?
    jar_path = File.expand_path(File.join('..', '..', '..', 'dependencies', 'tika-app-1.5.jar'), __FILE__)
  end

  raise FileNotFound, "Jar #{jar_path} does not exist" unless File.exists?(jar_path)

  @jar_path = jar_path
end

Instance Attribute Details

#jar_pathObject (readonly)

Returns the value of attribute jar_path.



8
9
10
# File 'lib/tika-masala/parser.rb', line 8

def jar_path
  @jar_path
end

Instance Method Details

#detect_type(file) ⇒ Object



28
29
30
# File 'lib/tika-masala/parser.rb', line 28

def detect_type(file)
  java_exec('--detect', file)
end

#java_exec(*args) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/tika-masala/parser.rb', line 32

def java_exec(*args)
  stdin, stdout, stderr, wait_thr = Open3.popen3("java -jar #{@jar_path.shellescape} #{args.shelljoin}")

  exitstatus = wait_thr.value

  if exitstatus != 0
    expection = TikaError.new(
      stdout: stdout.read,
      stderr: stderr.read,
      exitstatus: exitstatus
    )
    raise expection
  else
    stdout.read
  end
ensure
  stdin.close
  stdout.close
  stderr.close
end

#metadata(file) ⇒ Object



24
25
26
# File 'lib/tika-masala/parser.rb', line 24

def (file)
  java_exec('--metadata', file)
end

#parse(file) ⇒ Object



20
21
22
# File 'lib/tika-masala/parser.rb', line 20

def parse(file)
  java_exec('--text', file)
end