Class: PDF::Reader::Encoding::SymbolEncoding

Inherits:
PDF::Reader::Encoding show all
Defined in:
lib/pdf/reader/encoding.rb

Constant Summary

Constants inherited from PDF::Reader::Encoding

UNKNOWN_CHAR

Instance Attribute Summary

Attributes inherited from PDF::Reader::Encoding

#differences

Instance Method Summary collapse

Methods inherited from PDF::Reader::Encoding

factory

Instance Method Details

#to_utf8(str, tounicode = nil) ⇒ Object

convert a SymbolEncoding string into UTF-8



587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
# File 'lib/pdf/reader/encoding.rb', line 587

def to_utf8(str, tounicode = nil)
  array_symbol = str.unpack('C*')
  array_symbol = self.process_differences(array_symbol)
  array_enc = []
  array_symbol.each do |num|
    if tounicode && (code = tounicode.decode(num))
      array_enc << code
    elsif tounicode
      array_enc << PDF::Reader::Encoding::UNKNOWN_CHAR
    else
      case num
      when 0x22; array_enc << 0x2200
      when 0x24; array_enc << 0x2203
      when 0x27; array_enc << 0x220B
      when 0x2A; array_enc << 0x2217
      when 0x2D; array_enc << 0x2212
      when 0x40; array_enc << 0x2245
      when 0x41; array_enc << 0x0391
      when 0x42; array_enc << 0x0392
      when 0x43; array_enc << 0x03A7
      when 0x44; array_enc << 0x0394
      when 0x45; array_enc << 0x0395
      when 0x46; array_enc << 0x03A6
      when 0x47; array_enc << 0x0393
      when 0x48; array_enc << 0x0397
      when 0x49; array_enc << 0x0399
      when 0x4A; array_enc << 0x03D1
      when 0x4B; array_enc << 0x039A
      when 0x4C; array_enc << 0x039B
      when 0x4D; array_enc << 0x039C
      when 0x4E; array_enc << 0x039D
      when 0x4F; array_enc << 0x039F
      when 0x50; array_enc << 0x03A0
      when 0x51; array_enc << 0x0398
      when 0x52; array_enc << 0x03A1
      when 0x53; array_enc << 0x03A3
      when 0x54; array_enc << 0x03A4
      when 0x55; array_enc << 0x03A5
      when 0x56; array_enc << 0x03C2
      when 0x57; array_enc << 0x03A9
      when 0x58; array_enc << 0x039E
      when 0x59; array_enc << 0x03A8
      when 0x5A; array_enc << 0x0396
      when 0x5C; array_enc << 0x2234
      when 0x5E; array_enc << 0x22A5
      when 0x60; array_enc << 0xF8E5
      when 0x61; array_enc << 0x03B1
      when 0x62; array_enc << 0x03B2
      when 0x63; array_enc << 0x03C7
      when 0x64; array_enc << 0x03B4
      when 0x65; array_enc << 0x03B5
      when 0x66; array_enc << 0x03C6
      when 0x67; array_enc << 0x03B3
      when 0x68; array_enc << 0x03B7
      when 0x69; array_enc << 0x03B9
      when 0x6A; array_enc << 0x03D5
      when 0x6B; array_enc << 0x03BA
      when 0x6C; array_enc << 0x03BB
      when 0x6D; array_enc << 0x03BC
      when 0x6E; array_enc << 0x03BD
      when 0x6F; array_enc << 0x03BF
      when 0x70; array_enc << 0x03C0
      when 0x71; array_enc << 0x03B8
      when 0x72; array_enc << 0x03C1
      when 0x73; array_enc << 0x03C3
      when 0x74; array_enc << 0x03C4
      when 0x75; array_enc << 0x03C5
      when 0x76; array_enc << 0x03D6
      when 0x77; array_enc << 0x03C9
      when 0x78; array_enc << 0x03BE
      when 0x79; array_enc << 0x03C8
      when 0x7A; array_enc << 0x03B6
      when 0x7E; array_enc << 0x223C
      when 0xA0; array_enc << 0x20AC
      when 0xA1; array_enc << 0x03D2
      when 0xA2; array_enc << 0x2032
      when 0xA3; array_enc << 0x2264
      when 0xA4; array_enc << 0x2215
      when 0xA5; array_enc << 0x221E
      when 0xA6; array_enc << 0x0192
      when 0xA7; array_enc << 0x2663
      when 0xA8; array_enc << 0x2666
      when 0xA9; array_enc << 0x2665
      when 0xAA; array_enc << 0x2660
      when 0xAB; array_enc << 0x2194
      when 0xAC; array_enc << 0x2190
      when 0xAD; array_enc << 0x2191
      when 0xAE; array_enc << 0x2192
      when 0xAF; array_enc << 0x2193
      when 0xB2; array_enc << 0x2033
      when 0xB3; array_enc << 0x2265
      when 0xB4; array_enc << 0x00D7
      when 0xB5; array_enc << 0x221D
      when 0xB6; array_enc << 0x2202
      when 0xB7; array_enc << 0x2022
      when 0xB8; array_enc << 0x00F7
      when 0xB9; array_enc << 0x2260
      when 0xBA; array_enc << 0x2261
      when 0xBB; array_enc << 0x2248
      when 0xBC; array_enc << 0x2026
      when 0xBD; array_enc << 0xF8E6
      when 0xBE; array_enc << 0xF8E7
      when 0xBF; array_enc << 0x21B5
      when 0xC0; array_enc << 0x2135
      when 0xC1; array_enc << 0x2111
      when 0xC2; array_enc << 0x211C
      when 0xC3; array_enc << 0x2118
      when 0xC4; array_enc << 0x2297
      when 0xC5; array_enc << 0x2295
      when 0xC6; array_enc << 0x2205
      when 0xC7; array_enc << 0x2229
      when 0xC8; array_enc << 0x222A
      when 0xC9; array_enc << 0x2283
      when 0xCA; array_enc << 0x2287
      when 0xCB; array_enc << 0x2284
      when 0xCC; array_enc << 0x2282
      when 0xCD; array_enc << 0x2286
      when 0xCE; array_enc << 0x2208
      when 0xCF; array_enc << 0x2209
      when 0xD0; array_enc << 0x2220
      when 0xD1; array_enc << 0x2207
      when 0xD2; array_enc << 0xF6DA
      when 0xD3; array_enc << 0xF6D9
      when 0xD4; array_enc << 0xF6DB
      when 0xD5; array_enc << 0x220F
      when 0xD6; array_enc << 0x221A
      when 0xD7; array_enc << 0x22C5
      when 0xD8; array_enc << 0x00AC
      when 0xD9; array_enc << 0x2227
      when 0xDA; array_enc << 0x2228
      when 0xDB; array_enc << 0x21D4
      when 0xDC; array_enc << 0x21D0
      when 0xDD; array_enc << 0x21D1
      when 0xDE; array_enc << 0x21D2
      when 0xDF; array_enc << 0x21D3
      when 0xE0; array_enc << 0x25CA
      when 0xE1; array_enc << 0x2329
      when 0xE2; array_enc << 0xF8E8
      when 0xE3; array_enc << 0xF8E9
      when 0xE4; array_enc << 0xF8EA
      when 0xE5; array_enc << 0x2211
      when 0xE6; array_enc << 0xF8EB
      when 0xE7; array_enc << 0xF8EC
      when 0xE8; array_enc << 0xF8ED
      when 0xE9; array_enc << 0xF8EE
      when 0xEA; array_enc << 0xF8EF
      when 0xEB; array_enc << 0xF8F0
      when 0xEC; array_enc << 0xF8F1
      when 0xED; array_enc << 0xF8F2
      when 0xEE; array_enc << 0xF8F3
      when 0xEF; array_enc << 0xF8F4
      when 0xF1; array_enc << 0x232A
      when 0xF2; array_enc << 0x222B
      when 0xF3; array_enc << 0x2320
      when 0xF4; array_enc << 0xF8F5
      when 0xF5; array_enc << 0x2321
      when 0xF6; array_enc << 0xF8F6
      when 0xF7; array_enc << 0xF8F7
      when 0xF8; array_enc << 0xF8F8
      when 0xF9; array_enc << 0xF8F9
      when 0xFA; array_enc << 0xF8FA
      when 0xFB; array_enc << 0xF8FB
      when 0xFC; array_enc << 0xF8FC
      when 0xFD; array_enc << 0xF8FD
      when 0xFE; array_enc << 0xF8FE
      else
        array_enc << num
      end
    end
  end

  # replace charcters that didn't convert to unicode nicely with something valid
  array_enc.collect! { |c| c ? c : PDF::Reader::Encoding::UNKNOWN_CHAR }

  # convert any glyph names to unicode codepoints
  array_enc = self.process_glyphnames(array_enc)

  # pack all our Unicode codepoints into a UTF-8 string
  ret = array_enc.pack("U*")

  # set the strings encoding correctly under ruby 1.9+
  ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

  return ret
end