Method: Docsplit::InfoExtractor#extract_all

Defined in:
lib/docsplit/info_extractor.rb

#extract_all(pdfs, opts) ⇒ Object

Raises:



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/docsplit/info_extractor.rb', line 23

def extract_all(pdfs, opts)
  pdf = [pdfs].flatten.first
  cmd = "pdfinfo #{ESCAPE[pdf]} 2>&1"
  result = `#{cmd}`.chomp
  raise ExtractionFailed, result if $? != 0
  # ruby  1.8 (iconv) and 1.9 (String#encode) :
  if String.method_defined?(:encode)
    result.encode!('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => "") unless result.valid_encoding?
  else
    require 'iconv' unless defined?(Iconv)
    ic = Iconv.new('UTF-8//IGNORE','UTF-8')
    result = ic.iconv(result)
  end
  info = {}
  MATCHERS.each do |key, matcher|
    match = result.match(matcher)
    answer = match && match[1]
    if answer
      answer = answer.to_i if key == :length
      info[key] = answer
    end
  end
  info
end