Class: PDF::Reader::Font

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/font.rb

Overview

Represents a single font PDF object and provides some useful methods for extracting info. Mainly used for converting text to UTF-8.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ohash, obj) ⇒ Font

Returns a new instance of Font.



41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/pdf/reader/font.rb', line 41

def initialize(ohash, obj)
  @ohash = ohash
  @tounicode = nil

  extract_base_info(obj)
  extract_type3_info(obj)
  extract_descriptor(obj)
  extract_descendants(obj)
  @width_calc = build_width_calculator

  @encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
end

Instance Attribute Details

#basefontObject (readonly)

Returns the value of attribute basefont.



38
39
40
# File 'lib/pdf/reader/font.rb', line 38

def basefont
  @basefont
end

#cid_default_widthObject (readonly)

Returns the value of attribute cid_default_width.



38
39
40
# File 'lib/pdf/reader/font.rb', line 38

def cid_default_width
  @cid_default_width
end

#cid_widthsObject (readonly)

Returns the value of attribute cid_widths.



38
39
40
# File 'lib/pdf/reader/font.rb', line 38

def cid_widths
  @cid_widths
end

#descendantfontsObject

Returns the value of attribute descendantfonts.



37
38
39
# File 'lib/pdf/reader/font.rb', line 37

def descendantfonts
  @descendantfonts
end

#encodingObject

Returns the value of attribute encoding.



37
38
39
# File 'lib/pdf/reader/font.rb', line 37

def encoding
  @encoding
end

#first_charObject (readonly)

Returns the value of attribute first_char.



38
39
40
# File 'lib/pdf/reader/font.rb', line 38

def first_char
  @first_char
end

#font_descriptorObject (readonly)

Returns the value of attribute font_descriptor.



38
39
40
# File 'lib/pdf/reader/font.rb', line 38

def font_descriptor
  @font_descriptor
end

#last_charObject (readonly)

Returns the value of attribute last_char.



38
39
40
# File 'lib/pdf/reader/font.rb', line 38

def last_char
  @last_char
end

#subtypeObject

Returns the value of attribute subtype.



37
38
39
# File 'lib/pdf/reader/font.rb', line 37

def subtype
  @subtype
end

#tounicodeObject

Returns the value of attribute tounicode.



37
38
39
# File 'lib/pdf/reader/font.rb', line 37

def tounicode
  @tounicode
end

#widthsObject (readonly)

Returns the value of attribute widths.



38
39
40
# File 'lib/pdf/reader/font.rb', line 38

def widths
  @widths
end

Instance Method Details

#glyph_width(code_point) ⇒ Object

looks up the specified codepoint and returns a value that is in (pdf) glyph space, which is 1000 glyph units = 1 text space unit



68
69
70
71
72
73
74
75
# File 'lib/pdf/reader/font.rb', line 68

def glyph_width(code_point)
  if code_point.is_a?(String)
    code_point = code_point.unpack(encoding.unpack).first
  end

  @cached_widths ||= {}
  @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
end

#glyph_width_in_text_space(code_point) ⇒ Object

In most cases glyph width is converted into text space with a simple divide by 1000.

However, Type3 fonts provide their own FontMatrix that’s used for the transformation.



81
82
83
84
85
86
87
88
89
90
91
# File 'lib/pdf/reader/font.rb', line 81

def glyph_width_in_text_space(code_point)
  glyph_width_in_glyph_space = glyph_width(code_point)

  if @subtype == :Type3
    x1, _y1 = font_matrix_transform(0,0)
    x2, _y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
    (x2 - x1).abs.round(2)
  else
    glyph_width_in_glyph_space / 1000.0
  end
end

#to_utf8(params) ⇒ Object



54
55
56
57
58
59
60
# File 'lib/pdf/reader/font.rb', line 54

def to_utf8(params)
  if @tounicode
    to_utf8_via_cmap(params)
  else
    to_utf8_via_encoding(params)
  end
end

#unpack(data) ⇒ Object



62
63
64
# File 'lib/pdf/reader/font.rb', line 62

def unpack(data)
  data.unpack(encoding.unpack)
end