Class: PDF::Reader::Encoding::IdentityH

Inherits:

PDF::Reader::Encoding

Object
PDF::Reader::Encoding
PDF::Reader::Encoding::IdentityH

show all

Defined in:: lib/pdf/reader/encoding.rb

Constant Summary

Constants inherited from PDF::Reader::Encoding

UNKNOWN_CHAR

Instance Attribute Summary

Attributes inherited from PDF::Reader::Encoding

#differences

Instance Method Summary collapse

#to_utf8(str, map = nil) ⇒ Object

Methods inherited from PDF::Reader::Encoding

factory

Instance Method Details

#to_utf8(str, map = nil) ⇒ `Object`

# File 'lib/pdf/reader/encoding.rb', line 107

def to_utf8(str, map = nil)
  
  array_enc = []

  # iterate over string, reading it in 2 byte chunks and interpreting those
  # chunks as ints
  str.unpack("n*").each do |c|
    # convert the int to a unicode codepoint if possible.
    # without a ToUnicode CMap, it's impossible to reliably convert this text
    # to unicode, so just replace each character with a little box. Big smacks
    # the the PDF producing app.
    if map
      array_enc << map.decode(c)
    else
      array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
    end
  end
  
  # replace charcters that didn't convert to unicode nicely with something valid
  array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
  
  # pack all our Unicode codepoints into a UTF-8 string
  ret = array_enc.pack("U*")

  # set the strings encoding correctly under ruby 1.9+
  ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

  return ret
end

Class: PDF::Reader::Encoding::IdentityH

Constant Summary

Constants inherited from PDF::Reader::Encoding

Instance Attribute Summary

Attributes inherited from PDF::Reader::Encoding

Instance Method Summary collapse

Methods inherited from PDF::Reader::Encoding

Instance Method Details

#to_utf8(str, map = nil) ⇒ Object

#to_utf8(str, map = nil) ⇒ `Object`