Class: PDF::Reader::Encoding

Inherits:

Object

Object
PDF::Reader::Encoding

Defined in:: lib/pdf/reader/encoding.rb

Constant Summary collapse

UNKNOWN_CHAR = ▯

0x25AF

Instance Attribute Summary collapse

#differences ⇒ Object

Returns the value of attribute differences.

Instance Method Summary collapse

#initialize(enc) ⇒ Encoding constructor

A new instance of Encoding.
#to_utf8(str, tounicode = nil) ⇒ Object

convert the specified string to utf8.

Constructor Details

#initialize(enc) ⇒ `Encoding`

Returns a new instance of Encoding.

# File 'lib/pdf/reader/encoding.rb', line 35

def initialize(enc)
  if enc.kind_of?(Hash)
    self.differences=enc[:Differences] if enc[:Differences]
    enc = enc[:Encoding] || enc[:BaseEncoding]
  elsif enc != nil
    enc = enc.to_sym
  end

  case enc
    when nil                   then 
      load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
      @unpack = "C*"
    when "Identity-H".to_sym   then 
      @unpack = "n*"
      @to_unicode_required = true
    when :MacRomanEncoding     then 
      load_mapping File.dirname(__FILE__) + "/encodings/mac_roman.txt"
      @unpack = "C*"
    when :MacExpertEncoding    then 
      load_mapping File.dirname(__FILE__) + "/encodings/mac_expert.txt"
      @unpack = "C*"
    when :PDFDocEncoding       then 
      load_mapping File.dirname(__FILE__) + "/encodings/pdf_doc.txt"
      @unpack = "C*"
    when :StandardEncoding     then 
      load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
      @unpack = "C*"
    when :SymbolEncoding       then 
      load_mapping File.dirname(__FILE__) + "/encodings/symbol.txt"
      @unpack = "C*"
    when :UTF16Encoding        then 
      @unpack = "n*"
    when :WinAnsiEncoding      then 
      load_mapping File.dirname(__FILE__) + "/encodings/win_ansi.txt"
      @unpack = "C*"
    when :ZapfDingbatsEncoding then 
      load_mapping File.dirname(__FILE__) + "/encodings/zapf_dingbats.txt"
      @unpack = "C*"
    else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding"
  end
end

Instance Attribute Details

#differences ⇒ `Object`

Returns the value of attribute differences.



33
34
35

# File 'lib/pdf/reader/encoding.rb', line 33

def differences
  @differences
end

Instance Method Details

#to_utf8(str, tounicode = nil) ⇒ `Object`

convert the specified string to utf8

# File 'lib/pdf/reader/encoding.rb', line 103

def to_utf8(str, tounicode = nil)

  # unpack the single bytes
  array_orig = str.unpack(@unpack)

  # replace any relevant bytes with a glyph name
  array_orig = process_differences(array_orig)

  # replace any remaining bytes with a unicode codepoint
  array_enc = []
  array_orig.each do |num|
    if tounicode && (code = tounicode.decode(num))
      array_enc << code
    elsif tounicode || (tounicode.nil? && @to_unicode_required)
      array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
    elsif @mapping && @mapping[num]
      array_enc << @mapping[num]
    else
      array_enc << num
    end
  end

  # convert any glyph names to unicode codepoints
  array_enc = process_glyphnames(array_enc)

  # replace charcters that didn't convert to unicode nicely with something valid
  array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }

  # pack all our Unicode codepoints into a UTF-8 string
  ret = array_enc.pack("U*")

  # set the strings encoding correctly under ruby 1.9+
  ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

  return ret
end

Class: PDF::Reader::Encoding

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(enc) ⇒ Encoding

Instance Attribute Details

#differences ⇒ Object

Instance Method Details

#to_utf8(str, tounicode = nil) ⇒ Object

#initialize(enc) ⇒ `Encoding`

#differences ⇒ `Object`

#to_utf8(str, tounicode = nil) ⇒ `Object`