Class: PDF::Reader::Encoding::MacRomanEncoding
- Inherits:
-
PDF::Reader::Encoding
- Object
- PDF::Reader::Encoding
- PDF::Reader::Encoding::MacRomanEncoding
- Defined in:
- lib/pdf/reader/encoding.rb
Overview
The default encoding for OSX <= v9 see: en.wikipedia.org/wiki/Mac_OS_Roman
Constant Summary
Constants inherited from PDF::Reader::Encoding
Instance Attribute Summary
Attributes inherited from PDF::Reader::Encoding
Instance Method Summary collapse
-
#to_utf8(str, tounicode = nil) ⇒ Object
convert a MacRomanEncoding string into UTF-8.
Methods inherited from PDF::Reader::Encoding
Instance Method Details
#to_utf8(str, tounicode = nil) ⇒ Object
convert a MacRomanEncoding string into UTF-8
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 |
# File 'lib/pdf/reader/encoding.rb', line 331 def to_utf8(str, tounicode = nil) # content of this method borrowed from REXML::Encoding.decode_cp1252 array_mac = str.unpack('C*') array_mac = self.process_differences(array_mac) array_enc = [] array_mac.each do |num| case num # change necesary characters to equivilant Unicode codepoints when 0x80; array_enc << 0x00C4 when 0x81; array_enc << 0x00C5 when 0x82; array_enc << 0x00C7 when 0x83; array_enc << 0x00C9 when 0x84; array_enc << 0x00D1 when 0x85; array_enc << 0x00D6 when 0x86; array_enc << 0x00DC when 0x87; array_enc << 0x00E1 when 0x88; array_enc << 0x00E0 when 0x89; array_enc << 0x00E2 when 0x8A; array_enc << 0x00E4 when 0x8B; array_enc << 0x00E3 when 0x8C; array_enc << 0x00E5 when 0x8D; array_enc << 0x00E7 when 0x8E; array_enc << 0x00E9 when 0x8F; array_enc << 0x00E8 when 0x90; array_enc << 0x00EA when 0x91; array_enc << 0x00EB when 0x92; array_enc << 0x00ED when 0x93; array_enc << 0x00EC when 0x94; array_enc << 0x00EE when 0x95; array_enc << 0x00EF when 0x96; array_enc << 0x00F1 when 0x97; array_enc << 0x00F3 when 0x98; array_enc << 0x00F2 when 0x99; array_enc << 0x00F4 when 0x9A; array_enc << 0x00F6 when 0x9B; array_enc << 0x00F5 when 0x9C; array_enc << 0x00FA when 0x9D; array_enc << 0x00F9 when 0x9E; array_enc << 0x00FB when 0x9F; array_enc << 0x00FC when 0xA0; array_enc << 0x2020 when 0xA1; array_enc << 0x00B0 when 0xA2; array_enc << 0x00A2 when 0xA3; array_enc << 0x00A3 when 0xA4; array_enc << 0x00A7 when 0xA5; array_enc << 0x2022 when 0xA6; array_enc << 0x00B6 when 0xA7; array_enc << 0x00DF when 0xA8; array_enc << 0x00AE when 0xA9; array_enc << 0x00A9 when 0xAA; array_enc << 0x2122 when 0xAB; array_enc << 0x00B4 when 0xAC; array_enc << 0x00A8 when 0xAD; array_enc << 0x2260 when 0xAE; array_enc << 0x00C6 when 0xAF; array_enc << 0x00D8 when 0xB0; array_enc << 0x221E when 0xB1; array_enc << 0x00B1 when 0xB2; array_enc << 0x2264 when 0xB3; array_enc << 0x2265 when 0xB4; array_enc << 0x00A5 when 0xB5; array_enc << 0x00B5 when 0xB6; array_enc << 0x2202 when 0xB7; array_enc << 0x2211 when 0xB8; array_enc << 0x220F when 0xB9; array_enc << 0x03C0 when 0xBA; array_enc << 0x222B when 0xBB; array_enc << 0x00AA when 0xBC; array_enc << 0x00BA when 0xBD; array_enc << 0x03A9 when 0xBE; array_enc << 0x00E6 when 0xBF; array_enc << 0x00F8 when 0xC0; array_enc << 0x00BF when 0xC1; array_enc << 0x00A1 when 0xC2; array_enc << 0x00AC when 0xC3; array_enc << 0x221A when 0xC4; array_enc << 0x0192 when 0xC5; array_enc << 0x2248 when 0xC6; array_enc << 0x2206 when 0xC7; array_enc << 0x00AB when 0xC8; array_enc << 0x00BB when 0xC9; array_enc << 0x2026 when 0xCA; array_enc << 0x00A0 when 0xCB; array_enc << 0x00C0 when 0xCC; array_enc << 0x00C3 when 0xCD; array_enc << 0x00D5 when 0xCE; array_enc << 0x0152 when 0xCF; array_enc << 0x0153 when 0xD0; array_enc << 0x2013 when 0xD1; array_enc << 0x2014 when 0xD2; array_enc << 0x201C when 0xD3; array_enc << 0x201D when 0xD4; array_enc << 0x2018 when 0xD5; array_enc << 0x2019 when 0xD6; array_enc << 0x00F7 when 0xD7; array_enc << 0x25CA when 0xD8; array_enc << 0x00FF when 0xD9; array_enc << 0x0178 when 0xDA; array_enc << 0x2044 when 0xDB; array_enc << 0x20AC when 0xDC; array_enc << 0x2039 when 0xDD; array_enc << 0x203A when 0xDE; array_enc << 0xFB01 when 0xDF; array_enc << 0xFB02 when 0xE0; array_enc << 0x2021 when 0xE1; array_enc << 0x00B7 when 0xE2; array_enc << 0x201A when 0xE3; array_enc << 0x201E when 0xE4; array_enc << 0x2030 when 0xE5; array_enc << 0x00C2 when 0xE6; array_enc << 0x00CA when 0xE7; array_enc << 0x00C1 when 0xE8; array_enc << 0x00CB when 0xE9; array_enc << 0x00C8 when 0xEA; array_enc << 0x00CD when 0xEB; array_enc << 0x00CE when 0xEC; array_enc << 0x00CF when 0xED; array_enc << 0x00CC when 0xEE; array_enc << 0x00D3 when 0xEF; array_enc << 0x00D4 when 0xF0; array_enc << 0xF8FF when 0xF1; array_enc << 0x00D2 when 0xF2; array_enc << 0x00DA when 0xF3; array_enc << 0x00D8 when 0xF4; array_enc << 0x00D9 when 0xF5; array_enc << 0x0131 when 0xF6; array_enc << 0x02C6 when 0xF7; array_enc << 0x02DC when 0xF8; array_enc << 0x00AF when 0xF9; array_enc << 0x02D8 when 0xFA; array_enc << 0x02D9 when 0xFB; array_enc << 0x02DA when 0xFC; array_enc << 0x00B8 when 0xFD; array_enc << 0x02DD when 0xFE; array_enc << 0x02DB when 0xFF; array_enc << 0x02C7 else array_enc << num end end # convert any glyph names to unicode codepoints array_enc = self.process_glyphnames(array_enc) # replace charcters that didn't convert to unicode nicely with something valid array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR } # pack all our Unicode codepoints into a UTF-8 string ret = array_enc.pack("U*") # set the strings encoding correctly under ruby 1.9+ ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding) return ret end |