Class: PDF::Reader::Encoding::SymbolEncoding

Inherits:
PDF::Reader::Encoding show all
Defined in:
lib/pdf/reader/encoding.rb

Constant Summary

Constants inherited from PDF::Reader::Encoding

UNKNOWN_CHAR

Instance Attribute Summary

Attributes inherited from PDF::Reader::Encoding

#differences

Instance Method Summary collapse

Methods inherited from PDF::Reader::Encoding

factory

Instance Method Details

#to_utf8(str, tounicode = nil) ⇒ Object

convert a SymbolEncoding string into UTF-8



568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
# File 'lib/pdf/reader/encoding.rb', line 568

def to_utf8(str, tounicode = nil)
  array_symbol = str.unpack('C*')
  array_symbol = self.process_differences(array_symbol)
  array_enc = []
  array_symbol.each do |num|
    case num
    when 0x22; array_enc << 0x2200
    when 0x24; array_enc << 0x2203
    when 0x27; array_enc << 0x220B
    when 0x2A; array_enc << 0x2217
    when 0x2D; array_enc << 0x2212
    when 0x40; array_enc << 0x2245
    when 0x41; array_enc << 0x0391
    when 0x42; array_enc << 0x0392
    when 0x43; array_enc << 0x03A7
    when 0x44; array_enc << 0x0394
    when 0x45; array_enc << 0x0395
    when 0x46; array_enc << 0x03A6
    when 0x47; array_enc << 0x0393
    when 0x48; array_enc << 0x0397
    when 0x49; array_enc << 0x0399
    when 0x4A; array_enc << 0x03D1
    when 0x4B; array_enc << 0x039A
    when 0x4C; array_enc << 0x039B
    when 0x4D; array_enc << 0x039C
    when 0x4E; array_enc << 0x039D
    when 0x4F; array_enc << 0x039F
    when 0x50; array_enc << 0x03A0
    when 0x51; array_enc << 0x0398
    when 0x52; array_enc << 0x03A1
    when 0x53; array_enc << 0x03A3
    when 0x54; array_enc << 0x03A4
    when 0x55; array_enc << 0x03A5
    when 0x56; array_enc << 0x03C2
    when 0x57; array_enc << 0x03A9
    when 0x58; array_enc << 0x039E
    when 0x59; array_enc << 0x03A8
    when 0x5A; array_enc << 0x0396
    when 0x5C; array_enc << 0x2234
    when 0x5E; array_enc << 0x22A5
    when 0x60; array_enc << 0xF8E5
    when 0x61; array_enc << 0x03B1
    when 0x62; array_enc << 0x03B2
    when 0x63; array_enc << 0x03C7
    when 0x64; array_enc << 0x03B4
    when 0x65; array_enc << 0x03B5
    when 0x66; array_enc << 0x03C6
    when 0x67; array_enc << 0x03B3
    when 0x68; array_enc << 0x03B7
    when 0x69; array_enc << 0x03B9
    when 0x6A; array_enc << 0x03D5
    when 0x6B; array_enc << 0x03BA
    when 0x6C; array_enc << 0x03BB
    when 0x6D; array_enc << 0x03BC
    when 0x6E; array_enc << 0x03BD
    when 0x6F; array_enc << 0x03BF
    when 0x70; array_enc << 0x03C0
    when 0x71; array_enc << 0x03B8
    when 0x72; array_enc << 0x03C1
    when 0x73; array_enc << 0x03C3
    when 0x74; array_enc << 0x03C4
    when 0x75; array_enc << 0x03C5
    when 0x76; array_enc << 0x03D6
    when 0x77; array_enc << 0x03C9
    when 0x78; array_enc << 0x03BE
    when 0x79; array_enc << 0x03C8
    when 0x7A; array_enc << 0x03B6
    when 0x7E; array_enc << 0x223C
    when 0xA0; array_enc << 0x20AC
    when 0xA1; array_enc << 0x03D2
    when 0xA2; array_enc << 0x2032
    when 0xA3; array_enc << 0x2264
    when 0xA4; array_enc << 0x2215
    when 0xA5; array_enc << 0x221E
    when 0xA6; array_enc << 0x0192
    when 0xA7; array_enc << 0x2663
    when 0xA8; array_enc << 0x2666
    when 0xA9; array_enc << 0x2665
    when 0xAA; array_enc << 0x2660
    when 0xAB; array_enc << 0x2194
    when 0xAC; array_enc << 0x2190
    when 0xAD; array_enc << 0x2191
    when 0xAE; array_enc << 0x2192
    when 0xAF; array_enc << 0x2193
    when 0xB2; array_enc << 0x2033
    when 0xB3; array_enc << 0x2265
    when 0xB4; array_enc << 0x00D7
    when 0xB5; array_enc << 0x221D
    when 0xB6; array_enc << 0x2202
    when 0xB7; array_enc << 0x2022
    when 0xB8; array_enc << 0x00F7
    when 0xB9; array_enc << 0x2260
    when 0xBA; array_enc << 0x2261
    when 0xBB; array_enc << 0x2248
    when 0xBC; array_enc << 0x2026
    when 0xBD; array_enc << 0xF8E6
    when 0xBE; array_enc << 0xF8E7
    when 0xBF; array_enc << 0x21B5
    when 0xC0; array_enc << 0x2135
    when 0xC1; array_enc << 0x2111
    when 0xC2; array_enc << 0x211C
    when 0xC3; array_enc << 0x2118
    when 0xC4; array_enc << 0x2297
    when 0xC5; array_enc << 0x2295
    when 0xC6; array_enc << 0x2205
    when 0xC7; array_enc << 0x2229
    when 0xC8; array_enc << 0x222A
    when 0xC9; array_enc << 0x2283
    when 0xCA; array_enc << 0x2287
    when 0xCB; array_enc << 0x2284
    when 0xCC; array_enc << 0x2282
    when 0xCD; array_enc << 0x2286
    when 0xCE; array_enc << 0x2208
    when 0xCF; array_enc << 0x2209
    when 0xD0; array_enc << 0x2220
    when 0xD1; array_enc << 0x2207
    when 0xD2; array_enc << 0xF6DA
    when 0xD3; array_enc << 0xF6D9
    when 0xD4; array_enc << 0xF6DB
    when 0xD5; array_enc << 0x220F
    when 0xD6; array_enc << 0x221A
    when 0xD7; array_enc << 0x22C5
    when 0xD8; array_enc << 0x00AC
    when 0xD9; array_enc << 0x2227
    when 0xDA; array_enc << 0x2228
    when 0xDB; array_enc << 0x21D4
    when 0xDC; array_enc << 0x21D0
    when 0xDD; array_enc << 0x21D1
    when 0xDE; array_enc << 0x21D2
    when 0xDF; array_enc << 0x21D3
    when 0xE0; array_enc << 0x25CA
    when 0xE1; array_enc << 0x2329
    when 0xE2; array_enc << 0xF8E8
    when 0xE3; array_enc << 0xF8E9
    when 0xE4; array_enc << 0xF8EA
    when 0xE5; array_enc << 0x2211
    when 0xE6; array_enc << 0xF8EB
    when 0xE7; array_enc << 0xF8EC
    when 0xE8; array_enc << 0xF8ED
    when 0xE9; array_enc << 0xF8EE
    when 0xEA; array_enc << 0xF8EF
    when 0xEB; array_enc << 0xF8F0
    when 0xEC; array_enc << 0xF8F1
    when 0xED; array_enc << 0xF8F2
    when 0xEE; array_enc << 0xF8F3
    when 0xEF; array_enc << 0xF8F4
    when 0xF1; array_enc << 0x232A
    when 0xF2; array_enc << 0x222B
    when 0xF3; array_enc << 0x2320
    when 0xF4; array_enc << 0xF8F5
    when 0xF5; array_enc << 0x2321
    when 0xF6; array_enc << 0xF8F6
    when 0xF7; array_enc << 0xF8F7
    when 0xF8; array_enc << 0xF8F8
    when 0xF9; array_enc << 0xF8F9
    when 0xFA; array_enc << 0xF8FA
    when 0xFB; array_enc << 0xF8FB
    when 0xFC; array_enc << 0xF8FC
    when 0xFD; array_enc << 0xF8FD
    when 0xFE; array_enc << 0xF8FE
    else
      array_enc << num
    end
  end

  # replace charcters that didn't convert to unicode nicely with something valid
  array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }

  # convert any glyph names to unicode codepoints
  array_enc = self.process_glyphnames(array_enc)

  # pack all our Unicode codepoints into a UTF-8 string
  ret = array_enc.pack("U*")

  # set the strings encoding correctly under ruby 1.9+
  ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

  return ret
end