Class: PdfExtractor::OutputParser

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf_extractor/output_parser.rb

Overview

Parse PDFTk outputs

Class Method Summary collapse

Class Method Details

.both_not_nil?(a = nil, b = nil) ⇒ Boolean

Returns:

  • (Boolean)


58
59
60
# File 'lib/pdf_extractor/output_parser.rb', line 58

def self.both_not_nil?(a = nil, b = nil)
  !a.nil? && !b.nil?
end

.dump_data(output = nil) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/pdf_extractor/output_parser.rb', line 6

def self.dump_data(output = nil)
  fields = {}
  pending_key = pending_value = nil
  output.each do |line|
    key, value = line.split(': ').map(&:strip)
    next if %w[InfoBegin PageMediaBegin].include? key

    if key == 'InfoKey'
      pending_key = value
      if both_not_nil?(pending_key, pending_value)
        fields[pending_key] = pending_value
        pending_key = pending_value = nil
      end
    elsif key == 'InfoValue'
      pending_value = value
      if both_not_nil?(pending_key, pending_value)
        fields[pending_key] = pending_value
        pending_key = pending_value = nil
      end
    else
      fields[key] = value
    end
  end
  fields
end

.dump_data_fields(output = nil) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/pdf_extractor/output_parser.rb', line 32

def self.dump_data_fields(output = nil)
  return if output.nil?

  fields = []
  field = {}
  output.each do |line|
    if line.strip == '---'
      fields << field unless field.empty?
      field = {}
    else
      key, value = line.split(': ')
      field[key] = value.strip
    end
  end
  fields << field unless field.empty?
  fields
end

.dump_data_fields_key_value(output = nil) ⇒ Object



50
51
52
53
54
55
56
# File 'lib/pdf_extractor/output_parser.rb', line 50

def self.dump_data_fields_key_value(output = nil)
  field_array = dump_data_fields(output)
  return if field_array.empty?

  form = field_array.map { |i| { i['FieldName'] => i['FieldValue'] } }.each {  hash }
  Hash[*form.collect(&:to_a).flatten]
end