Class: Prawn::Fonts::ToUnicodeCMap

Inherits:

Object

Object
Prawn::Fonts::ToUnicodeCMap

show all

Defined in:: lib/prawn/fonts/to_unicode_cmap.rb

Overview

This class generates ToUnicode CMap for embedde TrueType/OpenType fonts. It’s a separate format and is somewhat complicated so it has its own place.

Instance Method Summary collapse

#generate ⇒ String

Generate CMap.
#initialize(mapping, code_space_size = nil) ⇒ ToUnicodeCMap constructor

mapping is expected to be a hash with keys being character codes (in broad sense, as used in the showing operation strings) and values being Unicode code points.

Constructor Details

#initialize(mapping, code_space_size = nil) ⇒ `ToUnicodeCMap`

mapping is expected to be a hash with keys being character codes (in broad sense, as used in the showing operation strings) and values being Unicode code points.

# File 'lib/prawn/fonts/to_unicode_cmap.rb', line 14

def initialize(mapping, code_space_size = nil)
  @mapping = mapping
  @code_space_size = code_space_size
end

Instance Method Details

#generate ⇒ `String`

Generate CMap.

Returns:

(String)

# File 'lib/prawn/fonts/to_unicode_cmap.rb', line 22

def generate
  chunks = []

  # Header
  chunks << <<~HEADER.chomp
    /CIDInit /ProcSet findresource begin
    12 dict begin
    begincmap
    /CIDSystemInfo 3 dict dup begin
      /Registry (Adobe) def
      /Ordering (UCS) def
      /Supplement 0 def
    end def
    /CMapName /Adobe-Identity-UCS def
    /CMapType 2 def
  HEADER

  max_glyph_index = mapping.keys.max
  # Range
  code_space_size = (max_glyph_index.bit_length / 8.0).ceil

  used_code_space_size = @code_space_size || code_space_size

  # In CMap codespaces are not sequentional, they're ranges in
  # a multi-dimentional space. Each byte is considered separately. So we
  # have to maximally extend the lower bytes in order to allow for
  # continuos mapping.
  # We only keep the highest byte because usually it's lower than
  # maximally allowed and we don't want to cover that unused space.
  code_space_max = max_glyph_index | ('ff' * (code_space_size - 1)).to_i(16)

  chunks << '1 begincodespacerange'
  chunks << format("<%0#{used_code_space_size * 2}X><%0#{used_code_space_size * 2}X>", 0, code_space_max)
  chunks << 'endcodespacerange'

  # Mapping
  all_spans = mapping_spans(mapping.reject { |gid, cid| gid.zero? || (0xd800..0xdfff).cover?(cid) })

  short_spans, long_spans = all_spans.partition { |span| span[0] == :short }

  long_spans
    .each_slice(100) do |spans|
      chunks << "#{spans.length} beginbfrange"

      spans.each do |type, span|
        # rubocop: disable Lint/FormatParameterMismatch # false positive
        case type
        when :fully_sorted
          chunks << format(
            "<%0#{code_space_size * 2}X><%0#{code_space_size * 2}X><%s>",
            span.first[0],
            span.last[0],
            span.first[1].chr(::Encoding::UTF_16BE).unpack1('H*'),
          )
        when :index_sorted
          chunks << format(
            "<%0#{code_space_size * 2}X><%0#{code_space_size * 2}X>[%s]",
            span.first[0],
            span.last[0],
            span.map { |_, cid| "<#{cid.chr(::Encoding::UTF_16BE).unpack1('H*')}>" }.join(''),
          )
        end
        # rubocop: enable Lint/FormatParameterMismatch
      end

      chunks << 'endbfrange'
    end

  short_spans
    .map { |_type, slice| slice.flatten(1) }
    .each_slice(100) do |mapping|
      chunks << "#{mapping.length} beginbfchar"
      chunks.concat(
        mapping.map { |(gid, cid)|
          # rubocop: disable Lint/FormatParameterMismatch # false positive
          format(
            "<%0#{code_space_size * 2}X><%s>",
            gid,
            cid.chr(::Encoding::UTF_16BE).unpack1('H*'),
          )
          # rubocop: enable Lint/FormatParameterMismatch
        },
      )
      chunks << 'endbfchar'
    end

  # Footer
  chunks << <<~FOOTER.chomp
    endcmap
    CMapName currentdict /CMap defineresource pop
    end
    end
  FOOTER

  chunks.join("\n")
end