Class: PDF::Reader::Encoding
- Inherits:
-
Object
- Object
- PDF::Reader::Encoding
- Defined in:
- lib/pdf/reader/encoding.rb
Constant Summary collapse
- CONTROL_CHARS =
[0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23, 24,25,26,27,28,29,30,31]
- UNKNOWN_CHAR =
▯
0x25AF
Instance Attribute Summary collapse
-
#differences ⇒ Object
Returns the value of attribute differences.
-
#unpack ⇒ Object
readonly
Returns the value of attribute unpack.
Instance Method Summary collapse
-
#initialize(enc) ⇒ Encoding
constructor
A new instance of Encoding.
- #to_unicode_required? ⇒ Boolean
-
#to_utf8(str, tounicode = nil) ⇒ Object
convert the specified string to utf8.
Constructor Details
#initialize(enc) ⇒ Encoding
Returns a new instance of Encoding.
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/pdf/reader/encoding.rb', line 36 def initialize(enc) @to_unicode_required = false if enc.kind_of?(Hash) self.differences=enc[:Differences] if enc[:Differences] enc = enc[:Encoding] || enc[:BaseEncoding] elsif enc != nil enc = enc.to_sym end case enc when nil then load_mapping File.dirname(__FILE__) + "/encodings/standard.txt" @unpack = "C*" when "Identity-H".to_sym then @unpack = "n*" @to_unicode_required = true when :MacRomanEncoding then load_mapping File.dirname(__FILE__) + "/encodings/mac_roman.txt" @unpack = "C*" when :MacExpertEncoding then load_mapping File.dirname(__FILE__) + "/encodings/mac_expert.txt" @unpack = "C*" when :PDFDocEncoding then load_mapping File.dirname(__FILE__) + "/encodings/pdf_doc.txt" @unpack = "C*" when :StandardEncoding then load_mapping File.dirname(__FILE__) + "/encodings/standard.txt" @unpack = "C*" when :SymbolEncoding then load_mapping File.dirname(__FILE__) + "/encodings/symbol.txt" @unpack = "C*" when :UTF16Encoding then @unpack = "n*" when :WinAnsiEncoding then load_mapping File.dirname(__FILE__) + "/encodings/win_ansi.txt" @unpack = "C*" when :ZapfDingbatsEncoding then load_mapping File.dirname(__FILE__) + "/encodings/zapf_dingbats.txt" @unpack = "C*" else raise UnsupportedFeatureError, "#{enc} is not currently a supported encoding" end end |
Instance Attribute Details
#differences ⇒ Object
Returns the value of attribute differences.
34 35 36 |
# File 'lib/pdf/reader/encoding.rb', line 34 def differences @differences end |
#unpack ⇒ Object (readonly)
Returns the value of attribute unpack.
34 35 36 |
# File 'lib/pdf/reader/encoding.rb', line 34 def unpack @unpack end |
Instance Method Details
#to_unicode_required? ⇒ Boolean
80 81 82 |
# File 'lib/pdf/reader/encoding.rb', line 80 def to_unicode_required? @to_unicode_required end |
#to_utf8(str, tounicode = nil) ⇒ Object
convert the specified string to utf8
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/pdf/reader/encoding.rb', line 110 def to_utf8(str, tounicode = nil) # unpack the single bytes array_orig = str.unpack(unpack) # replace any relevant bytes with a glyph name array_orig = process_differences(array_orig) # replace any remaining bytes with a unicode codepoint array_enc = array_orig.map do |num| if tounicode && (code = tounicode.decode(num)) code elsif tounicode || ( tounicode.nil? && to_unicode_required? ) PDF::Reader::Encoding::UNKNOWN_CHAR elsif mapping[num] mapping[num] elsif PDF::Reader::Encoding::CONTROL_CHARS.include?(num) PDF::Reader::Encoding::UNKNOWN_CHAR else num end end # convert any glyph names to unicode codepoints array_enc = process_glyphnames(array_enc) # replace charcters that didn't convert to unicode nicely with something valid array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR } # pack all our Unicode codepoints into a UTF-8 string ret = array_enc.pack("U*") # set the strings encoding correctly under ruby 1.9+ ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding) return ret end |