Class: Rpdf2txt::Image

Inherits:

Stream

Object
PdfObject
Stream
Rpdf2txt::Image

show all

Defined in:: lib/rpdf2txt/object.rb

Direct Known Subclasses

InlineImage

Constant Summary collapse

COLORMAPS =

{
  '/DeviceRGB'  => 'RGB',
  '/DeviceGray' => 'I',
  '/DeviceCMYK' => 'CMYK',
}

Constants inherited from Stream

Stream::BT_PATTERN, Stream::ET_PATTERN, Stream::FAIL_PTRN

Instance Attribute Summary

Attributes inherited from PdfObject

#attributes, #decoder, #oid, #src

Instance Method Summary collapse

Methods inherited from Stream

#append, #decode_raw_stream, #decoded_stream, #decoded_stream=, #extract_horizontal_rules, #extract_nontext_objects, #extract_text_objects, #flate_decode, #lzw_decode, #raw_stream, #to_cmap

Methods inherited from PdfObject

#_parse_attributes, #build_tree, #catalogue_object, #decoded_stream, #extract_attribute_stream, #initialize, #parse_attributes, #revision_id

Constructor Details

This class inherits a constructor from Rpdf2txt::PdfObject

Instance Method Details

#idat_decode(data, width, colors) ⇒ `Object`

# File 'lib/rpdf2txt/object.rb', line 909

def idat_decode(data, width, colors)
  scanline_length = colors * width + 1 # for filter
  byte_width = width * colors

  pixels = []
  row = 0
  until data.empty? do
    row_data = data.slice! 0, scanline_length
    filter = row_data.shift
    case filter
    when 0 then # None
    when 1 then # Sub
      row_data.each_with_index do |byte, index|
        left = index < colors ? 0 : row_data[index - colors]
        row_data[index] = (byte + left) % 256
      end
    when 2 then # Up
      row_data.each_with_index do |byte, index|
        upper = row == 0 ? 0 : pixels[ - byte_width + index ]
        row_data[index] = (upper + byte) % 256
      end
    when 3 then # Average
      row_data.each_with_index do |byte, index|
        upper = row == 0 ? 0 : pixels[ - byte_width + index ]
        left = index < colors ? 0 : row_data[index - colors]

        row_data[index] = (byte + ((left + upper)/2).floor) % 256
      end
    when 4 then # Paeth
      left = upper = upper_left = nil
      row_data.each_with_index do |byte, index|

        left = index < colors ? 0 : row_data[index - colors]
        if row == 0 then
          upper = upper_left = 0
        else
          upper_idx = - byte_width + index
          upper = pixels[ upper_idx ]
          upper_left = index < colors ? 0 : pixels[ upper_idx - colors ]
        end

        paeth = paeth left, upper, upper_left
        row_data[index] = (byte + paeth) % 256
      end
    else
      raise ArgumentError, "Invalid filter algorithm #{filter}"
    end

    pixels.concat row_data
    row += 1
  end
  pixels
end

#image ⇒ `Object`

# File 'lib/rpdf2txt/object.rb', line 861

def image
  require 'RMagick'
  @image or begin
    columns = @attributes[:width].to_i
    rows = @attributes[:height].to_i
    depth = @attributes[:bitspercomponent].to_i
    mask = @attributes[:mask]
    color_grades = 2 ** depth - 1
    colorspace, basespace, index_colors, index = @attributes[:colorspace]
    index_colors = index_colors.to_i
    colormap = COLORMAPS[colorspace] || COLORMAPS[basespace] || 'RGB'
    colors = colormap.length
    pixels = extract_pixels(decoded_stream, depth)
    case colorspace
    when '/Indexed'
      ## FIXME: this works for some images, but seems to be wrong
      #         according to the Documentation
      if mask.is_a?(Array) && (pixels.size - 1 ) > rows * columns
        range = (mask[0].to_i)..(mask[1].to_i)
        pixels.delete_if { |idx| range.include? idx }
      end
      # for indexed images, index_colors correctly describes the 
      # depth of the resulting pixels, whereas bitspercomponent
      # may not be accurate
      color_grades = index_colors
      map = extract_colormap(index, index_colors)
      tmp = Array.new(pixels.size * colors)
      pos = 0
      pixels.each { |idx|
        tmp[pos, colors] = map[idx * colors, colors]
        pos += colors
      }
      pixels = tmp
    end
    ## this seems to be undocumented: PNG-images need to be decoded.
    #  we can detect this by the additional Byte per Row:
    if pixels.size == (columns * colors + 1) * rows
      pixels = idat_decode pixels, columns, colors
    elsif pixels.size > (rows * columns * colors)
      pixels = pixels[0, rows * columns * colors]
    end
    if color_grades != (2 ** Magick::QuantumDepth - 1)
      div = color_grades.to_f
      pixels.collect! { |px| px / div }
    end
    @image = Magick::Image.constitute(columns, rows, colormap, pixels)
  end
end

Class: Rpdf2txt::Image

Direct Known Subclasses

Constant Summary collapse

Constants inherited from Stream

Instance Attribute Summary

Attributes inherited from PdfObject

Instance Method Summary collapse

Methods inherited from Stream

Methods inherited from PdfObject

Constructor Details

Instance Method Details

#idat_decode(data, width, colors) ⇒ Object

#image ⇒ Object

#idat_decode(data, width, colors) ⇒ `Object`

#image ⇒ `Object`