Class: PDF::Reader::Filter

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/filter.rb

Overview

Various parts of a PDF file can be passed through a filter before being stored to provide support for features like compression and encryption. This class is for decoding that content.

Instance Method Summary collapse

Constructor Details

#initialize(name, options = nil) ⇒ Filter

creates a new filter for decoding content.

Filters that are only used to encode image data are accepted, but the data is returned untouched. At this stage PDF::Reader has no need to decode images.



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/pdf/reader/filter.rb', line 41

def initialize (name, options = nil)
  @options = options

  case name.to_sym
  when :ASCII85Decode   then @filter = :ascii85
  when :ASCIIHexDecode  then @filter = :asciihex
  when :CCITTFaxDecode  then @filter = nil
  when :DCTDecode       then @filter = nil
  when :FlateDecode     then @filter = :flate
  when :JBIG2Decode     then @filter = nil
  when :JPXDecode       then @filter = nil
  when :LZWDecode       then @filter = :lzw
  when :RunLengthDecode then @filter = :runlength
  else
    raise UnsupportedFeatureError, "Unknown filter: #{name}"
  end
end

Instance Method Details

#ascii85(data) ⇒ Object

Decode the specified data using the Ascii85 algorithm. Relies on the AScii85 rubygem.



75
76
77
78
79
80
81
# File 'lib/pdf/reader/filter.rb', line 75

def ascii85(data)
  data = "<~#{data}" unless data.to_s[0,2] == "<~"
  Ascii85::decode(data)
rescue Exception => e
  # Oops, there was a problem decoding the stream
  raise MalformedPDFError, "Error occured while decoding an ASCII85 stream (#{e.class.to_s}: #{e.to_s})"
end

#asciihex(data) ⇒ Object

Decode the specified data using the AsciiHex algorithm.



85
86
87
88
89
90
91
92
93
94
# File 'lib/pdf/reader/filter.rb', line 85

def asciihex(data)
  data.chop! if data[-1,1] == ">"
  data = data[1,data.size] if data[0,1] == "<"
  data.gsub!(/[^A-Fa-f0-9]/,"")
  data << "0" if data.size % 2 == 1
  data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
rescue Exception => e
  # Oops, there was a problem decoding the stream
  raise MalformedPDFError, "Error occured while decoding an ASCIIHex stream (#{e.class.to_s}: #{e.to_s})"
end

#depredict(data, opts = {}) ⇒ Object



158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/pdf/reader/filter.rb', line 158

def depredict(data, opts = {})
  predictor = (opts || {})[:Predictor].to_i

  case predictor
  when 0, 1 then
    data
  when 2    then
    tiff_depredict(data, opts)
  when 10, 11, 12, 13, 14, 15 then
    png_depredict(data, opts)
  else
    raise  MalformedPDFError, "Unrecognised predictor value (#{predictor})"
  end
end

#filter(data) ⇒ Object

attempts to decode the specified data with the current filter

Filters that are only used to encode image data are accepted, but the data is returned untouched. At this stage PDF::Reader has no need to decode images.



64
65
66
67
68
69
70
# File 'lib/pdf/reader/filter.rb', line 64

def filter (data)
  # leave the data untouched if we don't support the required filter
  return data if @filter.nil?

  # decode the data
  self.send(@filter, data)
end

#flate(data) ⇒ Object

Decode the specified data with the Zlib compression algorithm



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/pdf/reader/filter.rb', line 97

def flate (data)
  deflated = nil
  begin
    deflated = Zlib::Inflate.new.inflate(data)
  rescue Zlib::DataError => e
    # by default, Ruby's Zlib assumes the data it's inflating
    # is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
    # If that fails, then use an undocumented 'feature' to attempt to inflate
    # the data as a raw RFC1951 stream.
    #
    # See
    # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
    # - http://www.gzip.org/zlib/zlib_faq.html#faq38
    deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
  end
  depredict(deflated, @options)
rescue Exception => e
  # Oops, there was a problem inflating the stream
  raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
end

#lzw(data) ⇒ Object

Decode the specified data with the LZW compression algorithm



119
120
121
122
# File 'lib/pdf/reader/filter.rb', line 119

def lzw(data)
  data = PDF::Reader::LZW.decode(data)
  depredict(data, @options)
end

#png_depredict(data, opts = {}) ⇒ Object



199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/pdf/reader/filter.rb', line 199

def png_depredict(data, opts = {})
  return data if opts.nil? || opts[:Predictor].to_i < 10

  data = data.unpack("C*")

  pixel_bytes     = opts[:Colors] || 1
  scanline_length = (pixel_bytes * opts[:Columns]) + 1
  row = 0
  pixels = []
  paeth, pa, pb, pc = nil
  until data.empty? do
    row_data = data.slice! 0, scanline_length
    filter = row_data.shift
    case filter
    when 0 # None
    when 1 # Sub
      row_data.each_with_index do |byte, index|
        left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
        row_data[index] = (byte + left) % 256
        #p [byte, left, row_data[index]]
      end
    when 2 # Up
      row_data.each_with_index do |byte, index|
        col = index / pixel_bytes
        upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
        row_data[index] = (upper + byte) % 256
      end
    when 3  # Average
      row_data.each_with_index do |byte, index|
        col = index / pixel_bytes
        upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
        left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]

        row_data[index] = (byte + ((left + upper)/2).floor) % 256
      end
    when 4 # Paeth
      left = upper = upper_left = nil
      row_data.each_with_index do |byte, index|
        col = index / pixel_bytes

        left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
        if row.zero?
          upper = upper_left = 0
        else
          upper = pixels[row-1][col][index % pixel_bytes]
          upper_left = col.zero? ? 0 :
            pixels[row-1][col-1][index % pixel_bytes]
        end

        p = left + upper - upper_left
        pa = (p - left).abs
        pb = (p - upper).abs
        pc = (p - upper_left).abs

        paeth = if pa <= pb && pa <= pc
                  left
                elsif pb <= pc
                  upper
                else
                  upper_left
                end

        row_data[index] = (byte + paeth) % 256
      end
    else
      raise ArgumentError, "Invalid filter algorithm #{filter}"
    end

    s = []
    row_data.each_slice pixel_bytes do |slice|
      s << slice
    end
    pixels << s
    row += 1
  end

  pixels.map { |bytes| bytes.flatten.pack("C*") }.join("")
end

#runlength(data) ⇒ Object

Decode the specified data with the RunLengthDecode compression algorithm



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/pdf/reader/filter.rb', line 125

def runlength(data)
  pos = 0
  out = ""

  while pos < data.length
    if data.respond_to?(:getbyte)
      length = data.getbyte(pos)
    else
      length = data[pos]
    end
    pos += 1

    case
    when length == 128
      break
    when length < 128
      # When the length is < 128, we copy the following length+1 bytes
      # literally.
      out << data[pos, length + 1]
      pos += length
    else
      # When the length is > 128, we copy the next byte (257 - length)
      # times; i.e., "\xFA\x00" ([250, 0]) will expand to
      # "\x00\x00\x00\x00\x00\x00\x00".
      out << data[pos, 1] * (257 - length)
    end

    pos += 1
  end

  out
end

#tiff_depredict(data, opts = {}) ⇒ Object



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/pdf/reader/filter.rb', line 173

def tiff_depredict(data, opts = {})
  data        = data.unpack("C*")
  unfiltered  = []
  bpc         = opts[:BitsPerComponent] || 8
  pixel_bits  = bpc * opts[:Colors]
  pixel_bytes = pixel_bits / 8
  line_len    = (pixel_bytes * opts[:Columns])
  pos         = 0

  if bpc != 8
    raise UnsupportedFeatureError, "TIFF predictor onlys supports 8 Bits Per Component"
  end

  until pos > data.size
    row_data = data[pos, line_len]
    row_data.each_with_index do |byte, index|
      left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
      row_data[index] = (byte + left) % 256
    end
    unfiltered += row_data
    pos += line_len
  end

  unfiltered.pack("C*")
end