Class: PDF::Reader::Encoding

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/encoding.rb

Constant Summary collapse

CONTROL_CHARS =
[0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23,
24,25,26,27,28,29,30,31]
UNKNOWN_CHAR =

0x25AF

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(enc) ⇒ Encoding

Returns a new instance of Encoding.



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/pdf/reader/encoding.rb', line 36

def initialize(enc)
  @to_unicode_required = false

  if enc.kind_of?(Hash)
    self.differences=enc[:Differences] if enc[:Differences]
    enc = enc[:Encoding] || enc[:BaseEncoding]
  elsif enc != nil
    enc = enc.to_sym
  end

  case enc
    when nil                   then
      load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
      @unpack = "C*"
    when "Identity-H".to_sym   then
      @unpack = "n*"
      @to_unicode_required = true
    when :MacRomanEncoding     then
      load_mapping File.dirname(__FILE__) + "/encodings/mac_roman.txt"
      @unpack = "C*"
    when :MacExpertEncoding    then
      load_mapping File.dirname(__FILE__) + "/encodings/mac_expert.txt"
      @unpack = "C*"
    when :PDFDocEncoding       then
      load_mapping File.dirname(__FILE__) + "/encodings/pdf_doc.txt"
      @unpack = "C*"
    when :StandardEncoding     then
      load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
      @unpack = "C*"
    when :SymbolEncoding       then
      load_mapping File.dirname(__FILE__) + "/encodings/symbol.txt"
      @unpack = "C*"
    when :UTF16Encoding        then
      @unpack = "n*"
    when :WinAnsiEncoding      then
      load_mapping File.dirname(__FILE__) + "/encodings/win_ansi.txt"
      @unpack = "C*"
    when :ZapfDingbatsEncoding then
      load_mapping File.dirname(__FILE__) + "/encodings/zapf_dingbats.txt"
      @unpack = "C*"
    else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding"
  end
end

Instance Attribute Details

#differencesObject

Returns the value of attribute differences.



34
35
36
# File 'lib/pdf/reader/encoding.rb', line 34

def differences
  @differences
end

#unpackObject (readonly)

Returns the value of attribute unpack.



34
35
36
# File 'lib/pdf/reader/encoding.rb', line 34

def unpack
  @unpack
end

Instance Method Details

#to_unicode_required?Boolean

Returns:

  • (Boolean)


80
81
82
# File 'lib/pdf/reader/encoding.rb', line 80

def to_unicode_required?
  @to_unicode_required
end

#to_utf8(str, tounicode = nil) ⇒ Object

convert the specified string to utf8



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/pdf/reader/encoding.rb', line 110

def to_utf8(str, tounicode = nil)
  # unpack the single bytes
  array_orig = str.unpack(unpack)

  # replace any relevant bytes with a glyph name
  array_orig = process_differences(array_orig)

  # replace any remaining bytes with a unicode codepoint
  array_enc = array_orig.map do |num|
    if tounicode && (code = tounicode.decode(num))
      code
    elsif tounicode || ( tounicode.nil? && to_unicode_required? )
      PDF::Reader::Encoding::UNKNOWN_CHAR
    elsif mapping[num]
      mapping[num]
    elsif PDF::Reader::Encoding::CONTROL_CHARS.include?(num)
      PDF::Reader::Encoding::UNKNOWN_CHAR
    else
      num
    end
  end

  # convert any glyph names to unicode codepoints
  array_enc = process_glyphnames(array_enc)

  # replace charcters that didn't convert to unicode nicely with something valid
  array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }

  # pack all our Unicode codepoints into a UTF-8 string
  ret = array_enc.pack("U*")

  # set the strings encoding correctly under ruby 1.9+
  ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

  return ret
end