Class: PDF::Reader::Encoding

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/encoding.rb

Constant Summary collapse

UNKNOWN_CHAR =

0x25AF

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(enc) ⇒ Encoding

Returns a new instance of Encoding.



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/pdf/reader/encoding.rb', line 35

def initialize(enc)
  if enc.kind_of?(Hash)
    self.differences=enc[:Differences] if enc[:Differences]
    enc = enc[:Encoding] || enc[:BaseEncoding]
  elsif enc != nil
    enc = enc.to_sym
  end

  case enc
    when nil                   then 
      load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
      @unpack = "C*"
    when "Identity-H".to_sym   then 
      @unpack = "n*"
      @to_unicode_required = true
    when :MacRomanEncoding     then 
      load_mapping File.dirname(__FILE__) + "/encodings/mac_roman.txt"
      @unpack = "C*"
    when :MacExpertEncoding    then 
      load_mapping File.dirname(__FILE__) + "/encodings/mac_expert.txt"
      @unpack = "C*"
    when :PDFDocEncoding       then 
      load_mapping File.dirname(__FILE__) + "/encodings/pdf_doc.txt"
      @unpack = "C*"
    when :StandardEncoding     then 
      load_mapping File.dirname(__FILE__) + "/encodings/standard.txt"
      @unpack = "C*"
    when :SymbolEncoding       then 
      load_mapping File.dirname(__FILE__) + "/encodings/symbol.txt"
      @unpack = "C*"
    when :UTF16Encoding        then 
      @unpack = "n*"
    when :WinAnsiEncoding      then 
      load_mapping File.dirname(__FILE__) + "/encodings/win_ansi.txt"
      @unpack = "C*"
    when :ZapfDingbatsEncoding then 
      load_mapping File.dirname(__FILE__) + "/encodings/zapf_dingbats.txt"
      @unpack = "C*"
    else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding"
  end
end

Instance Attribute Details

#differencesObject

Returns the value of attribute differences.



33
34
35
# File 'lib/pdf/reader/encoding.rb', line 33

def differences
  @differences
end

Instance Method Details

#to_utf8(str, tounicode = nil) ⇒ Object

convert the specified string to utf8



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/pdf/reader/encoding.rb', line 103

def to_utf8(str, tounicode = nil)

  # unpack the single bytes
  array_orig = str.unpack(@unpack)

  # replace any relevant bytes with a glyph name
  array_orig = process_differences(array_orig)

  # replace any remaining bytes with a unicode codepoint
  array_enc = []
  array_orig.each do |num|
    if tounicode && (code = tounicode.decode(num))
      array_enc << code
    elsif tounicode || (tounicode.nil? && @to_unicode_required)
      array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
    elsif @mapping && @mapping[num]
      array_enc << @mapping[num]
    else
      array_enc << num
    end
  end

  # convert any glyph names to unicode codepoints
  array_enc = process_glyphnames(array_enc)

  # replace charcters that didn't convert to unicode nicely with something valid
  array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }

  # pack all our Unicode codepoints into a UTF-8 string
  ret = array_enc.pack("U*")

  # set the strings encoding correctly under ruby 1.9+
  ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

  return ret
end