Class: PDF::Reader::Filter

Inherits:

Object

Object
PDF::Reader::Filter

Defined in:: lib/pdf/reader/filter.rb

Overview

Various parts of a PDF file can be passed through a filter before being stored to provide support for features like compression and encryption. This class is for decoding that content.

Instance Method Summary collapse

#ascii85(data) ⇒ Object

Decode the specified data using the Ascii85 algorithm.
#asciihex(data) ⇒ Object

Decode the specified data using the AsciiHex algorithm.
#depredict(data, opts = {}) ⇒ Object
#filter(data) ⇒ Object

attempts to decode the specified data with the current filter.
#flate(data) ⇒ Object

Decode the specified data with the Zlib compression algorithm.
#initialize(name, options = nil) ⇒ Filter constructor

creates a new filter for decoding content.
#lzw(data) ⇒ Object

Decode the specified data with the LZW compression algorithm.
#png_depredict(data, opts = {}) ⇒ Object
#runlength(data) ⇒ Object

Decode the specified data with the RunLengthDecode compression algorithm.
#tiff_depredict(data, opts = {}) ⇒ Object

Constructor Details

#initialize(name, options = nil) ⇒ `Filter`

creates a new filter for decoding content.

Filters that are only used to encode image data are accepted, but the data is returned untouched. At this stage PDF::Reader has no need to decode images.

# File 'lib/pdf/reader/filter.rb', line 41

def initialize (name, options = nil)
  @options = options

  case name.to_sym
  when :ASCII85Decode   then @filter = :ascii85
  when :ASCIIHexDecode  then @filter = :asciihex
  when :CCITTFaxDecode  then @filter = nil
  when :DCTDecode       then @filter = nil
  when :FlateDecode     then @filter = :flate
  when :JBIG2Decode     then @filter = nil
  when :JPXDecode       then @filter = nil
  when :LZWDecode       then @filter = :lzw
  when :RunLengthDecode then @filter = :runlength
  else
    raise UnsupportedFeatureError, "Unknown filter: #{name}"
  end
end

Instance Method Details

#ascii85(data) ⇒ `Object`

Decode the specified data using the Ascii85 algorithm. Relies on the AScii85 rubygem.

# File 'lib/pdf/reader/filter.rb', line 75

def ascii85(data)
  data = "<~#{data}" unless data.to_s[0,2] == "<~"
  Ascii85::decode(data)
rescue Exception => e
  # Oops, there was a problem decoding the stream
  raise MalformedPDFError, "Error occured while decoding an ASCII85 stream (#{e.class.to_s}: #{e.to_s})"
end

#asciihex(data) ⇒ `Object`

Decode the specified data using the AsciiHex algorithm.

# File 'lib/pdf/reader/filter.rb', line 85

def asciihex(data)
  data.chop! if data[-1,1] == ">"
  data = data[1,data.size] if data[0,1] == "<"
  data.gsub!(/[^A-Fa-f0-9]/,"")
  data << "0" if data.size % 2 == 1
  data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
rescue Exception => e
  # Oops, there was a problem decoding the stream
  raise MalformedPDFError, "Error occured while decoding an ASCIIHex stream (#{e.class.to_s}: #{e.to_s})"
end

#depredict(data, opts = {}) ⇒ `Object`

# File 'lib/pdf/reader/filter.rb', line 158

def depredict(data, opts = {})
  predictor = (opts || {})[:Predictor].to_i

  case predictor
  when 0, 1 then
    data
  when 2    then
    tiff_depredict(data, opts)
  when 10, 11, 12, 13, 14, 15 then
    png_depredict(data, opts)
  else
    raise  MalformedPDFError, "Unrecognised predictor value (#{predictor})"
  end
end

#filter(data) ⇒ `Object`

attempts to decode the specified data with the current filter

Filters that are only used to encode image data are accepted, but the data is returned untouched. At this stage PDF::Reader has no need to decode images.

# File 'lib/pdf/reader/filter.rb', line 64

def filter (data)
  # leave the data untouched if we don't support the required filter
  return data if @filter.nil?

  # decode the data
  self.send(@filter, data)
end

#flate(data) ⇒ `Object`

Decode the specified data with the Zlib compression algorithm

# File 'lib/pdf/reader/filter.rb', line 97

def flate (data)
  deflated = nil
  begin
    deflated = Zlib::Inflate.new.inflate(data)
  rescue Zlib::DataError => e
    # by default, Ruby's Zlib assumes the data it's inflating
    # is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
    # If that fails, then use an undocumented 'feature' to attempt to inflate
    # the data as a raw RFC1951 stream.
    #
    # See
    # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
    # - http://www.gzip.org/zlib/zlib_faq.html#faq38
    deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
  end
  depredict(deflated, @options)
rescue Exception => e
  # Oops, there was a problem inflating the stream
  raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
end

#lzw(data) ⇒ `Object`

Decode the specified data with the LZW compression algorithm

# File 'lib/pdf/reader/filter.rb', line 119

def lzw(data)
  data = PDF::Reader::LZW.decode(data)
  depredict(data, @options)
end

#png_depredict(data, opts = {}) ⇒ `Object`

# File 'lib/pdf/reader/filter.rb', line 199

def png_depredict(data, opts = {})
  return data if opts.nil? || opts[:Predictor].to_i < 10

  data = data.unpack("C*")

  pixel_bytes     = opts[:Colors] || 1
  scanline_length = (pixel_bytes * opts[:Columns]) + 1
  row = 0
  pixels = []
  paeth, pa, pb, pc = nil
  until data.empty? do
    row_data = data.slice! 0, scanline_length
    filter = row_data.shift
    case filter
    when 0 # None
    when 1 # Sub
      row_data.each_with_index do |byte, index|
        left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
        row_data[index] = (byte + left) % 256
        #p [byte, left, row_data[index]]
      end
    when 2 # Up
      row_data.each_with_index do |byte, index|
        col = index / pixel_bytes
        upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
        row_data[index] = (upper + byte) % 256
      end
    when 3  # Average
      row_data.each_with_index do |byte, index|
        col = index / pixel_bytes
        upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
        left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]

        row_data[index] = (byte + ((left + upper)/2).floor) % 256
      end
    when 4 # Paeth
      left = upper = upper_left = nil
      row_data.each_with_index do |byte, index|
        col = index / pixel_bytes

        left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
        if row.zero?
          upper = upper_left = 0
        else
          upper = pixels[row-1][col][index % pixel_bytes]
          upper_left = col.zero? ? 0 :
            pixels[row-1][col-1][index % pixel_bytes]
        end

        p = left + upper - upper_left
        pa = (p - left).abs
        pb = (p - upper).abs
        pc = (p - upper_left).abs

        paeth = if pa <= pb && pa <= pc
                  left
                elsif pb <= pc
                  upper
                else
                  upper_left
                end

        row_data[index] = (byte + paeth) % 256
      end
    else
      raise ArgumentError, "Invalid filter algorithm #{filter}"
    end

    s = []
    row_data.each_slice pixel_bytes do |slice|
      s << slice
    end
    pixels << s
    row += 1
  end

  pixels.map { |bytes| bytes.flatten.pack("C*") }.join("")
end

#runlength(data) ⇒ `Object`

Decode the specified data with the RunLengthDecode compression algorithm

# File 'lib/pdf/reader/filter.rb', line 125

def runlength(data)
  pos = 0
  out = ""

  while pos < data.length
    if data.respond_to?(:getbyte)
      length = data.getbyte(pos)
    else
      length = data[pos]
    end
    pos += 1

    case
    when length == 128
      break
    when length < 128
      # When the length is < 128, we copy the following length+1 bytes
      # literally.
      out << data[pos, length + 1]
      pos += length
    else
      # When the length is > 128, we copy the next byte (257 - length)
      # times; i.e., "\xFA\x00" ([250, 0]) will expand to
      # "\x00\x00\x00\x00\x00\x00\x00".
      out << data[pos, 1] * (257 - length)
    end

    pos += 1
  end

  out
end

#tiff_depredict(data, opts = {}) ⇒ `Object`

# File 'lib/pdf/reader/filter.rb', line 173

def tiff_depredict(data, opts = {})
  data        = data.unpack("C*")
  unfiltered  = []
  bpc         = opts[:BitsPerComponent] || 8
  pixel_bits  = bpc * opts[:Colors]
  pixel_bytes = pixel_bits / 8
  line_len    = (pixel_bytes * opts[:Columns])
  pos         = 0

  if bpc != 8
    raise UnsupportedFeatureError, "TIFF predictor onlys supports 8 Bits Per Component"
  end

  until pos > data.size
    row_data = data[pos, line_len]
    row_data.each_with_index do |byte, index|
      left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
      row_data[index] = (byte + left) % 256
    end
    unfiltered += row_data
    pos += line_len
  end

  unfiltered.pack("C*")
end

Class: PDF::Reader::Filter

Overview

Instance Method Summary collapse

Constructor Details

#initialize(name, options = nil) ⇒ Filter

Instance Method Details

#ascii85(data) ⇒ Object

#asciihex(data) ⇒ Object

#depredict(data, opts = {}) ⇒ Object

#filter(data) ⇒ Object

#flate(data) ⇒ Object

#lzw(data) ⇒ Object

#png_depredict(data, opts = {}) ⇒ Object

#runlength(data) ⇒ Object

#tiff_depredict(data, opts = {}) ⇒ Object