Class: PDF::Reader::Encoding

Inherits:

Object

Object
PDF::Reader::Encoding

show all

Defined in:: lib/pdf/reader/encoding.rb

Constant Summary collapse

CONTROL_CHARS =

[0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23,
24,25,26,27,28,29,30,31]

UNKNOWN_CHAR = ▯

0x25AF

Instance Attribute Summary collapse

#differences ⇒ Object

Returns the value of attribute differences.
#unpack ⇒ Object readonly

Returns the value of attribute unpack.

Instance Method Summary collapse

#initialize(enc) ⇒ Encoding constructor

A new instance of Encoding.
#to_unicode_required? ⇒ Boolean
#to_utf8(str, tounicode = nil) ⇒ Object

convert the specified string to utf8.

Constructor Details

#initialize(enc) ⇒ `Encoding`

Returns a new instance of Encoding.

# File 'lib/pdf/reader/encoding.rb', line 36

def initialize(enc)
  @to_unicode_required = false

  if enc.kind_of?(Hash)
    self.differences=enc[:Differences] if enc[:Differences]
    enc = enc[:Encoding] || enc[:BaseEncoding]
  elsif enc != nil
    enc = enc.to_sym
  end

  case enc
    when nil                   then
      load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
      @unpack = "C*"
    when "Identity-H".to_sym   then
      @unpack = "n*"
      @to_unicode_required = true
    when :MacRomanEncoding     then
      load_mapping File.dirname(__FILE__) + "/encodings/mac_roman.txt"
      @unpack = "C*"
    when :MacExpertEncoding    then
      load_mapping File.dirname(__FILE__) + "/encodings/mac_expert.txt"
      @unpack = "C*"
    when :PDFDocEncoding       then
      load_mapping File.dirname(__FILE__) + "/encodings/pdf_doc.txt"
      @unpack = "C*"
    when :StandardEncoding     then
      load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
      @unpack = "C*"
    when :SymbolEncoding       then
      load_mapping File.dirname(__FILE__) + "/encodings/symbol.txt"
      @unpack = "C*"
    when :UTF16Encoding        then
      @unpack = "n*"
    when :WinAnsiEncoding      then
      load_mapping File.dirname(__FILE__) + "/encodings/win_ansi.txt"
      @unpack = "C*"
    when :ZapfDingbatsEncoding then
      load_mapping File.dirname(__FILE__) + "/encodings/zapf_dingbats.txt"
      @unpack = "C*"
    else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding"
  end
end

Instance Attribute Details

#differences ⇒ `Object`

Returns the value of attribute differences.



34
35
36

# File 'lib/pdf/reader/encoding.rb', line 34

def differences
  @differences
end

#unpack ⇒ `Object` (readonly)

Returns the value of attribute unpack.



34
35
36

# File 'lib/pdf/reader/encoding.rb', line 34

def unpack
  @unpack
end

Instance Method Details

#to_unicode_required? ⇒ `Boolean`

Returns:

(Boolean)



80
81
82

# File 'lib/pdf/reader/encoding.rb', line 80

def to_unicode_required?
  @to_unicode_required
end

#to_utf8(str, tounicode = nil) ⇒ `Object`

convert the specified string to utf8

# File 'lib/pdf/reader/encoding.rb', line 110

def to_utf8(str, tounicode = nil)
  # unpack the single bytes
  array_orig = str.unpack(unpack)

  # replace any relevant bytes with a glyph name
  array_orig = process_differences(array_orig)

  # replace any remaining bytes with a unicode codepoint
  array_enc = array_orig.map do |num|
    if tounicode && (code = tounicode.decode(num))
      code
    elsif tounicode || ( tounicode.nil? && to_unicode_required? )
      PDF::Reader::Encoding::UNKNOWN_CHAR
    elsif mapping[num]
      mapping[num]
    elsif PDF::Reader::Encoding::CONTROL_CHARS.include?(num)
      PDF::Reader::Encoding::UNKNOWN_CHAR
    else
      num
    end
  end

  # convert any glyph names to unicode codepoints
  array_enc = process_glyphnames(array_enc)

  # replace charcters that didn't convert to unicode nicely with something valid
  array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }

  # pack all our Unicode codepoints into a UTF-8 string
  ret = array_enc.pack("U*")

  # set the strings encoding correctly under ruby 1.9+
  ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

  return ret
end