Class: PDF::Reader::Filter

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/filter.rb

Overview

Various parts of a PDF file can be passed through a filter before being stored to provide support for features like compression and encryption. This class is for decoding that content.

Instance Method Summary collapse

Constructor Details

#initialize(name, options = nil) ⇒ Filter

creates a new filter for decoding content.

Filters that are only used to encode image data are accepted, but the data is returned untouched. At this stage PDF::Reader has no need to decode images.



40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/pdf/reader/filter.rb', line 40

def initialize (name, options = nil)
  @options = options

  case name.to_sym
  when :ASCII85Decode  then @filter = :ascii85
  when :ASCIIHexDecode then @filter = :asciihex
  when :CCITTFaxDecode then @filter = nil
  when :DCTDecode      then @filter = nil
  when :FlateDecode    then @filter = :flate
  when :JBIG2Decode    then @filter = nil
  when :LZWDecode      then @filter = :lzw
  else                 raise UnsupportedFeatureError, "Unknown filter: #{name}"
  end
end

Instance Method Details

#ascii85(data) ⇒ Object

Decode the specified data using the Ascii85 algorithm. Relies on the AScii85 rubygem.



71
72
73
74
75
76
77
# File 'lib/pdf/reader/filter.rb', line 71

def ascii85(data)
  data = "<~#{data}" unless data.to_s[0,2] == "<~"
  Ascii85::decode(data)
rescue Exception => e
  # Oops, there was a problem decoding the stream
  raise MalformedPDFError, "Error occured while decoding an ASCII85 stream (#{e.class.to_s}: #{e.to_s})"
end

#asciihex(data) ⇒ Object

Decode the specified data using the AsciiHex algorithm.



81
82
83
84
85
86
87
88
89
90
# File 'lib/pdf/reader/filter.rb', line 81

def asciihex(data)
  data.chop! if data[-1,1] == ">"
  data = data[1,data.size] if data[0,1] == "<"
  data.gsub!(/[^A-Fa-f0-9]/,"")
  data << "0" if data.size % 2 == 1
  data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
rescue Exception => e
  # Oops, there was a problem decoding the stream
  raise MalformedPDFError, "Error occured while decoding an ASCIIHex stream (#{e.class.to_s}: #{e.to_s})"
end

#depredict(data, opts = {}) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/pdf/reader/filter.rb', line 120

def depredict(data, opts = {})
  predictor = (opts || {})[:Predictor].to_i

  case predictor
  when 0, 1 then
    data
  when 2    then
    tiff_depredict(data, opts)
  when 10, 11, 12, 13, 14, 15 then
    png_depredict(data, opts)
  else
    raise  MalformedPDFError, "Unrecognised predictor value (#{predictor})"
  end
end

#filter(data) ⇒ Object

attempts to decode the specified data with the current filter

Filters that are only used to encode image data are accepted, but the data is returned untouched. At this stage PDF::Reader has no need to decode images.



60
61
62
63
64
65
66
# File 'lib/pdf/reader/filter.rb', line 60

def filter (data)
  # leave the data untouched if we don't support the required filter
  return data if @filter.nil?

  # decode the data
  self.send(@filter, data)
end

#flate(data) ⇒ Object

Decode the specified data with the Zlib compression algorithm



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/pdf/reader/filter.rb', line 93

def flate (data)
  deflated = nil
  begin
    deflated = Zlib::Inflate.new.inflate(data)
  rescue Zlib::DataError => e
    # by default, Ruby's Zlib assumes the data it's inflating
    # is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
    # If that fails, then use an undocumented 'feature' to attempt to inflate
    # the data as a raw RFC1951 stream.
    #
    # See
    # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
    # - http://www.gzip.org/zlib/zlib_faq.html#faq38
    deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
  end
  depredict(deflated, @options)
rescue Exception => e
  # Oops, there was a problem inflating the stream
  raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
end

#lzw(data) ⇒ Object

Decode the specified data with the LZW compression algorithm



115
116
117
118
# File 'lib/pdf/reader/filter.rb', line 115

def lzw(data)
  data = PDF::Reader::LZW.decode(data)
  depredict(data, @options)
end

#png_depredict(data, opts = {}) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/pdf/reader/filter.rb', line 139

def png_depredict(data, opts = {})
  return data if opts.nil? || opts[:Predictor].to_i < 10

  data = data.unpack("C*")

  pixel_bytes     = 1 #pixel_bitlength / 8
  scanline_length = (pixel_bytes * opts[:Columns]) + 1
  row = 0
  pixels = []
  paeth, pa, pb, pc = nil
  until data.empty? do
    row_data = data.slice! 0, scanline_length
    filter = row_data.shift
    case filter
    when 0 # None
    when 1 # Sub
      row_data.each_with_index do |byte, index|
        left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
        row_data[index] = (byte + left) % 256
        #p [byte, left, row_data[index]]
      end
    when 2 # Up
      row_data.each_with_index do |byte, index|
        col = index / pixel_bytes
        upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
        row_data[index] = (upper + byte) % 256
      end
    when 3  # Average
      row_data.each_with_index do |byte, index|
        col = index / pixel_bytes
        upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
        left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]

        row_data[index] = (byte + ((left + upper)/2).floor) % 256
      end
    when 4 # Paeth
      left = upper = upper_left = nil
      row_data.each_with_index do |byte, index|
        col = index / pixel_bytes

        left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
        if row.zero?
          upper = upper_left = 0
        else
          upper = pixels[row-1][col][index % pixel_bytes]
          upper_left = col.zero? ? 0 :
            pixels[row-1][col-1][index % pixel_bytes]
        end

        p = left + upper - upper_left
        pa = (p - left).abs
        pb = (p - upper).abs
        pc = (p - upper_left).abs

        paeth = if pa <= pb && pa <= pc
                  left
                elsif pb <= pc
                  upper
                else
                  upper_left
                end

        row_data[index] = (byte + paeth) % 256
      end
    else
      raise ArgumentError, "Invalid filter algorithm #{filter}"
    end

    s = []
    row_data.each_slice pixel_bytes do |slice|
      s << slice
    end
    pixels << s
    row += 1
  end

  pixels.map { |row| row.flatten.pack("C*") }.join("")
end

#tiff_depredict(data, opts = {}) ⇒ Object



135
136
137
# File 'lib/pdf/reader/filter.rb', line 135

def tiff_depredict(data, opts = {})
  raise UnsupportedFeatureError, "TIFF predictor not supported"
end