Class: PDF::Reader::Encoding::StandardEncoding

Inherits:

PDF::Reader::Encoding

Object
PDF::Reader::Encoding
PDF::Reader::Encoding::StandardEncoding

show all

Defined in:: lib/pdf/reader/encoding.rb

Constant Summary

Constants inherited from PDF::Reader::Encoding

UNKNOWN_CHAR

Instance Attribute Summary

Attributes inherited from PDF::Reader::Encoding

#differences

Instance Method Summary collapse

#to_utf8(str, tounicode = nil) ⇒ Object

convert an Adobe Standard Encoding string into UTF-8.

Methods inherited from PDF::Reader::Encoding

factory

Instance Method Details

#to_utf8(str, tounicode = nil) ⇒ `Object`

convert an Adobe Standard Encoding string into UTF-8

# File 'lib/pdf/reader/encoding.rb', line 503

def to_utf8(str, tounicode = nil)
  # based on mapping described at:
  #   http://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/stdenc.txt
  array_std = str.unpack('C*')
  array_std = self.process_differences(array_std)
  array_enc = []
  array_std.each do |num|
    if tounicode && (code = tounicode.decode(num))
      array_enc << code
    elsif tounicode
      array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
    else
      case num
      when 0x27; array_enc << 0x2019
      when 0x60; array_enc << 0x2018
      when 0xA4; array_enc << 0x2044
      when 0xA6; array_enc << 0x0192
      when 0xA8; array_enc << 0x00A4
      when 0xA9; array_enc << 0x0027
      when 0xAA; array_enc << 0x201C
      when 0xAC; array_enc << 0x2039
      when 0xAD; array_enc << 0x203A
      when 0xAE; array_enc << 0xFB01
      when 0xAF; array_enc << 0xFB02
      when 0xB1; array_enc << 0x2013
      when 0xB2; array_enc << 0x2020
      when 0xB3; array_enc << 0x2021
      when 0xB4; array_enc << 0x00B7
      when 0xB7; array_enc << 0x2022
      when 0xB8; array_enc << 0x201A
      when 0xB9; array_enc << 0x201E
      when 0xBA; array_enc << 0x201D
      when 0xBC; array_enc << 0x2026
      when 0xBD; array_enc << 0x2030
      when 0xC1; array_enc << 0x0060
      when 0xC2; array_enc << 0x00B4
      when 0xC3; array_enc << 0x02C6
      when 0xC4; array_enc << 0x02DC
      when 0xC5; array_enc << 0x00AF
      when 0xC6; array_enc << 0x02D8
      when 0xC7; array_enc << 0x02D9
      when 0xC8; array_enc << 0x00A8
      when 0xCA; array_enc << 0x02DA
      when 0xCB; array_enc << 0x00B8
      when 0xCD; array_enc << 0x02DD
      when 0xCE; array_enc << 0x02DB
      when 0xCF; array_enc << 0x02C7
      when 0xD0; array_enc << 0x2014
      when 0xE1; array_enc << 0x00C6
      when 0xE3; array_enc << 0x00AA
      when 0xE8; array_enc << 0x0141
      when 0xE9; array_enc << 0x00D8
      when 0xEA; array_enc << 0x0152
      when 0xEB; array_enc << 0x00BA
      when 0xF1; array_enc << 0x00E6
      when 0xF5; array_enc << 0x0131
      when 0xF8; array_enc << 0x0142
      when 0xF9; array_enc << 0x00F8
      when 0xFA; array_enc << 0x0153
      when 0xFB; array_enc << 0x00DF
      else
        array_enc << num
      end
    end
  end

  # convert any glyph names to unicode codepoints
  array_enc = self.process_glyphnames(array_enc)

  # replace charcters that didn't convert to unicode nicely with something valid
  array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }

  # pack all our Unicode codepoints into a UTF-8 string
  ret = array_enc.pack("U*")

  # set the strings encoding correctly under ruby 1.9+
  ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

  return ret
end

Class: PDF::Reader::Encoding::StandardEncoding

Constant Summary

Constants inherited from PDF::Reader::Encoding

Instance Attribute Summary

Attributes inherited from PDF::Reader::Encoding

Instance Method Summary collapse

Methods inherited from PDF::Reader::Encoding

Instance Method Details

#to_utf8(str, tounicode = nil) ⇒ Object

#to_utf8(str, tounicode = nil) ⇒ `Object`