Class: PDF::Reader::Encoding::MacExpertEncoding

Inherits:
PDF::Reader::Encoding show all
Defined in:
lib/pdf/reader/encoding.rb

Constant Summary

Constants inherited from PDF::Reader::Encoding

UNKNOWN_CHAR

Instance Attribute Summary

Attributes inherited from PDF::Reader::Encoding

#differences

Instance Method Summary collapse

Methods inherited from PDF::Reader::Encoding

factory

Instance Method Details

#to_utf8(str, tounicode = nil) ⇒ Object

convert a MacExpertEncoding string into UTF-8



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# File 'lib/pdf/reader/encoding.rb', line 140

def to_utf8(str, tounicode = nil)
  array_expert = str.unpack('C*')
  array_expert = self.process_differences(array_expert)
  array_enc = []
  array_expert.each do |num|
    case num
      # change necesary characters to equivilant Unicode codepoints
    when 0x21; array_enc << 0xF721
    when 0x22; array_enc << 0xF6F8 # Hungarumlautsmall
    when 0x23; array_enc << 0xF7A2
    when 0x24; array_enc << 0xF724
    when 0x25; array_enc << 0xF6E4
    when 0x26; array_enc << 0xF726
    when 0x27; array_enc << 0xF7B4
    when 0x28; array_enc << 0x207D
    when 0x29; array_enc << 0xF07E
    when 0x2A; array_enc << 0x2025
    when 0x2B; array_enc << 0x2024
    when 0x2F; array_enc << 0x2044
    when 0x30; array_enc << 0xF730
    when 0x31; array_enc << 0xF731
    when 0x32; array_enc << 0xF732
    when 0x33; array_enc << 0xF733
    when 0x34; array_enc << 0xF734
    when 0x35; array_enc << 0xF735
    when 0x36; array_enc << 0xF736
    when 0x37; array_enc << 0xF737
    when 0x38; array_enc << 0xF738
    when 0x39; array_enc << 0xF739
    when 0x3D; array_enc << 0xF6DE
    when 0x3F; array_enc << 0xF73F
    when 0x44; array_enc << 0xF7F0
    when 0x47; array_enc << 0x00BC
    when 0x48; array_enc << 0x00BD
    when 0x49; array_enc << 0x00BE
    when 0x4A; array_enc << 0x215B
    when 0x4B; array_enc << 0x215C
    when 0x4C; array_enc << 0x215D
    when 0x4D; array_enc << 0x215E
    when 0x4E; array_enc << 0x2153
    when 0x4F; array_enc << 0x2154
    when 0x56; array_enc << 0xFB00
    when 0x57; array_enc << 0xFB01
    when 0x58; array_enc << 0xFB02
    when 0x59; array_enc << 0xFB03
    when 0x5A; array_enc << 0xFB04
    when 0x5B; array_enc << 0x208D
    when 0x5D; array_enc << 0x208E
    when 0x5E; array_enc << 0xF6F6
    when 0x5F; array_enc << 0xF6E5
    when 0x60; array_enc << 0xF760
    when 0x61; array_enc << 0xF761
    when 0x62; array_enc << 0xF762
    when 0x63; array_enc << 0xF763
    when 0x64; array_enc << 0xF764
    when 0x65; array_enc << 0xF765
    when 0x66; array_enc << 0xF766
    when 0x67; array_enc << 0xF767
    when 0x68; array_enc << 0xF768
    when 0x69; array_enc << 0xF769
    when 0x6A; array_enc << 0xF76A
    when 0x6B; array_enc << 0xF76B
    when 0x6C; array_enc << 0xF76C
    when 0x6D; array_enc << 0xF76D
    when 0x6E; array_enc << 0xF76E
    when 0x6F; array_enc << 0xF76F
    when 0x70; array_enc << 0xF770
    when 0x71; array_enc << 0xF771
    when 0x72; array_enc << 0xF772
    when 0x73; array_enc << 0xF773
    when 0x74; array_enc << 0xF774
    when 0x75; array_enc << 0xF775
    when 0x76; array_enc << 0xF776
    when 0x77; array_enc << 0xF777
    when 0x78; array_enc << 0xF778
    when 0x79; array_enc << 0xF779
    when 0x7A; array_enc << 0xF77A
    when 0x7B; array_enc << 0x20A1
    when 0x7C; array_enc << 0xF6DC
    when 0x7D; array_enc << 0xF6DD
    when 0x7E; array_enc << 0xF6FE
    when 0x81; array_enc << 0xF6E9
    when 0x82; array_enc << 0xF6E0
    when 0x87; array_enc << 0xF7E1 # Acircumflexsmall
    when 0x88; array_enc << 0xF7E0
    when 0x89; array_enc << 0xF7E2 # Acutesmall
    when 0x8A; array_enc << 0xF7E4
    when 0x8B; array_enc << 0xF7E3
    when 0x8C; array_enc << 0xF7E5
    when 0x8D; array_enc << 0xF7E7
    when 0x8E; array_enc << 0xF7E9
    when 0x8F; array_enc << 0xF7E8
    when 0x90; array_enc << 0xF7E4
    when 0x91; array_enc << 0xF7EB
    when 0x92; array_enc << 0xF7ED
    when 0x93; array_enc << 0xF7EC
    when 0x94; array_enc << 0xF7EE
    when 0x95; array_enc << 0xF7EF
    when 0x96; array_enc << 0xF7F1
    when 0x97; array_enc << 0xF7F3
    when 0x98; array_enc << 0xF7F2
    when 0x99; array_enc << 0xF7F4
    when 0x9A; array_enc << 0xF7F6
    when 0x9B; array_enc << 0xF7F5
    when 0x9C; array_enc << 0xF7FA
    when 0x9D; array_enc << 0xF7F9
    when 0x9E; array_enc << 0xF7FB
    when 0x9F; array_enc << 0xF7FC
    when 0xA1; array_enc << 0x2078
    when 0xA2; array_enc << 0x2084
    when 0xA3; array_enc << 0x2083
    when 0xA4; array_enc << 0x2086
    when 0xA5; array_enc << 0x2088
    when 0xA6; array_enc << 0x2087
    when 0xA7; array_enc << 0xF6FD
    when 0xA9; array_enc << 0xF6DF
    when 0xAA; array_enc << 0x2082
    when 0xAC; array_enc << 0xF7A8
    when 0xAE; array_enc << 0xF6F5
    when 0xAF; array_enc << 0xF6F0
    when 0xB0; array_enc << 0x2085
    when 0xB2; array_enc << 0xF6E1
    when 0xB3; array_enc << 0xF6E7
    when 0xB4; array_enc << 0xF7FD
    when 0xB6; array_enc << 0xF6E3
    when 0xB9; array_enc << 0xF7FE
    when 0xBB; array_enc << 0x2089
    when 0xBC; array_enc << 0x2080
    when 0xBD; array_enc << 0xF6FF
    when 0xBE; array_enc << 0xF7E6 # AEsmall
    when 0xBF; array_enc << 0xF7F8
    when 0xC0; array_enc << 0xF7BF
    when 0xC1; array_enc << 0x2081
    when 0xC2; array_enc << 0xF6F9
    when 0xC9; array_enc << 0xF7B8
    when 0xCF; array_enc << 0xF6FA
    when 0xD0; array_enc << 0x2012
    when 0xD1; array_enc << 0xF6E6
    when 0xD6; array_enc << 0xF7A1
    when 0xD8; array_enc << 0xF7FF
    when 0xDA; array_enc << 0x00B9
    when 0xDB; array_enc << 0x00B2
    when 0xDC; array_enc << 0x00B3
    when 0xDD; array_enc << 0x2074
    when 0xDE; array_enc << 0x2075
    when 0xDF; array_enc << 0x2076
    when 0xE0; array_enc << 0x2077
    when 0xE1; array_enc << 0x2079
    when 0xE2; array_enc << 0x2070
    when 0xE4; array_enc << 0xF6EC
    when 0xE5; array_enc << 0xF6F1
    when 0xE6; array_enc << 0xF6F3
    when 0xE9; array_enc << 0xF6ED
    when 0xEA; array_enc << 0xF6F2
    when 0xEB; array_enc << 0xF6EB
    when 0xF1; array_enc << 0xF6EE
    when 0xF2; array_enc << 0xF6FB
    when 0xF3; array_enc << 0xF6F4
    when 0xF4; array_enc << 0xF7AF
    when 0xF5; array_enc << 0xF6EF
    when 0xF6; array_enc << 0x207F
    when 0xF7; array_enc << 0xF6EF
    when 0xF8; array_enc << 0xF6E2
    when 0xF9; array_enc << 0xF6E8
    when 0xFA; array_enc << 0xF6F7
    when 0xFB; array_enc << 0xF6FC
    else
      array_enc << num
    end
  end

  # convert any glyph names to unicode codepoints
  array_enc = self.process_glyphnames(array_enc)

  # replace charcters that didn't convert to unicode nicely with something valid
  array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }
  
  # pack all our Unicode codepoints into a UTF-8 string
  ret = array_enc.pack("U*")

  # set the strings encoding correctly under ruby 1.9+
  ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

  return ret
end