Class: PDF::Reader::Encoding

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/encoding.rb

Overview

:nodoc:

Constant Summary collapse

CONTROL_CHARS =
[0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23,
24,25,26,27,28,29,30,31]
UNKNOWN_CHAR =

0x25AF

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(enc) ⇒ Encoding

Returns a new instance of Encoding.



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/pdf/reader/encoding.rb', line 34

def initialize(enc)
  if enc.kind_of?(Hash)
    self.differences = enc[:Differences] if enc[:Differences]
    enc = enc[:Encoding] || enc[:BaseEncoding]
  elsif enc != nil
    enc = enc.to_sym
  else
    enc = nil
  end

  @to_unicode_required = unicode_required?(enc)
  @unpack   = get_unpack(enc)
  @map_file = get_mapping_file(enc)
  load_mapping(@map_file) if @map_file
end

Instance Attribute Details

#unpackObject (readonly)

Returns the value of attribute unpack.



32
33
34
# File 'lib/pdf/reader/encoding.rb', line 32

def unpack
  @unpack
end

Instance Method Details

#differencesObject



79
80
81
# File 'lib/pdf/reader/encoding.rb', line 79

def differences
  @differences ||= {}
end

#differences=(diff) ⇒ Object

set the differences table for this encoding. should be an array in the following format:

[25, :A, 26, :B]

The array alternates between a decimal byte number and a glyph name to map to that byte

To save space the following array is also valid and equivalent to the previous one

[25, :A, :B]

Raises:

  • (ArgumentError)


63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/pdf/reader/encoding.rb', line 63

def differences=(diff)
  raise ArgumentError, "diff must be an array" unless diff.kind_of?(Array)

  @differences = {}
  byte = 0
  diff.each do |val|
    if val.kind_of?(Numeric)
      byte = val.to_i
    else
      @differences[byte] = val
      byte += 1
    end
  end
  @differences
end

#to_unicode_required?Boolean

Returns:

  • (Boolean)


50
51
52
# File 'lib/pdf/reader/encoding.rb', line 50

def to_unicode_required?
  @to_unicode_required
end

#to_utf8(str, tounicode = nil) ⇒ Object

convert the specified string to utf8

  • unpack raw bytes into codepoints

  • replace any that have entries in the differences table with a glyph name

  • convert codepoints from source encoding to Unicode codepoints

  • convert any glyph names to Unicode codepoints

  • replace characters that didn’t convert to Unicode nicely with something valid

  • pack the final array of Unicode codepoints into a utf-8 string

  • mark the string as utf-8 if we’re running on a M17N aware VM



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/pdf/reader/encoding.rb', line 94

def to_utf8(str, tounicode = nil)
  ret = str.unpack(unpack).map { |c|
    differences[c] || c
  }.map { |num|
    original_codepoint_to_unicode(num, tounicode)
  }.map { |c|
    glyphnames[c] || c
  }.map { |c|
    if c.nil? || !c.is_a?(Fixnum)
      PDF::Reader::Encoding::UNKNOWN_CHAR
    else
      c
    end
  }.pack("U*")

  ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)

  ret
end