Class: PDF::Reader::Encoding
- Inherits:
-
Object
- Object
- PDF::Reader::Encoding
- Defined in:
- lib/pdf/reader/encoding.rb
Constant Summary collapse
- UNKNOWN_CHAR =
▯
0x25AF
Instance Attribute Summary collapse
-
#differences ⇒ Object
Returns the value of attribute differences.
Instance Method Summary collapse
-
#initialize(enc) ⇒ Encoding
constructor
A new instance of Encoding.
-
#to_utf8(str, tounicode = nil) ⇒ Object
convert the specified string to utf8.
Constructor Details
#initialize(enc) ⇒ Encoding
Returns a new instance of Encoding.
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/pdf/reader/encoding.rb', line 35 def initialize(enc) if enc.kind_of?(Hash) self.differences=enc[:Differences] if enc[:Differences] enc = enc[:Encoding] || enc[:BaseEncoding] elsif enc != nil enc = enc.to_sym end case enc when nil then load_mapping File.dirname(__FILE__) + "/encodings/standard.txt" @unpack = "C*" when "Identity-H".to_sym then @unpack = "n*" @to_unicode_required = true when :MacRomanEncoding then load_mapping File.dirname(__FILE__) + "/encodings/mac_roman.txt" @unpack = "C*" when :MacExpertEncoding then load_mapping File.dirname(__FILE__) + "/encodings/mac_expert.txt" @unpack = "C*" when :PDFDocEncoding then load_mapping File.dirname(__FILE__) + "/encodings/pdf_doc.txt" @unpack = "C*" when :StandardEncoding then load_mapping File.dirname(__FILE__) + "/encodings/standard.txt" @unpack = "C*" when :SymbolEncoding then load_mapping File.dirname(__FILE__) + "/encodings/symbol.txt" @unpack = "C*" when :UTF16Encoding then @unpack = "n*" when :WinAnsiEncoding then load_mapping File.dirname(__FILE__) + "/encodings/win_ansi.txt" @unpack = "C*" when :ZapfDingbatsEncoding then load_mapping File.dirname(__FILE__) + "/encodings/zapf_dingbats.txt" @unpack = "C*" else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding" end end |
Instance Attribute Details
#differences ⇒ Object
Returns the value of attribute differences.
33 34 35 |
# File 'lib/pdf/reader/encoding.rb', line 33 def differences @differences end |
Instance Method Details
#to_utf8(str, tounicode = nil) ⇒ Object
convert the specified string to utf8
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# File 'lib/pdf/reader/encoding.rb', line 103 def to_utf8(str, tounicode = nil) # unpack the single bytes array_orig = str.unpack(@unpack) # replace any relevant bytes with a glyph name array_orig = process_differences(array_orig) # replace any remaining bytes with a unicode codepoint array_enc = [] array_orig.each do |num| if tounicode && (code = tounicode.decode(num)) array_enc << code elsif tounicode || (tounicode.nil? && @to_unicode_required) array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR elsif @mapping && @mapping[num] array_enc << @mapping[num] else array_enc << num end end # convert any glyph names to unicode codepoints array_enc = process_glyphnames(array_enc) # replace charcters that didn't convert to unicode nicely with something valid array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR } # pack all our Unicode codepoints into a UTF-8 string ret = array_enc.pack("U*") # set the strings encoding correctly under ruby 1.9+ ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding) return ret end |