prawnpdf · pointlessone · Jan 15, 2024 · Nov 3, 2023 · koffeinfrei · Apr 11, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,13 @@
 
 ## Unreleased
 
+### Full font embedding
+
+Fonts can be embedded in their original form without subsetting or any other
+modification.
+
+(Alexander Mankuta, [#1322](https://github.com/prawnpdf/prawn/pull/1322))
+
 ## Fixed keyword arguments in Prawn::View
 
 (Kim Burgestrand, [1284](https://github.com/prawnpdf/prawn/pull/1284))

diff --git a/lib/prawn/font.rb b/lib/prawn/font.rb
@@ -145,19 +145,23 @@ def width_of(string, options = {})
       end
     end
 
-    # Hash that maps font family names to their styled individual font names.
+    # Hash that maps font family names to their styled individual font
+    # definitions.
     #
     # To add support for another font family, append to this hash, e.g:
     #
     #   pdf.font_families.update(
-    #    "MyTrueTypeFamily" => { :bold        => "foo-bold.ttf",
-    #                            :italic      => "foo-italic.ttf",
-    #                            :bold_italic => "foo-bold-italic.ttf",
-    #                            :normal      => "foo.ttf" })
+    #     "MyTrueTypeFamily" => {
+    #       bold: "foo-bold.ttf",
+    #       italic: "foo-italic.ttf",
+    #       bold_italic: "foo-bold-italic.ttf",
+    #       normal: "foo.ttf"
+    #     }
+    #   )
     #
     # This will then allow you to use the fonts like so:
     #
-    #   pdf.font("MyTrueTypeFamily", :style => :bold)
+    #   pdf.font("MyTrueTypeFamily", style: :bold)
     #   pdf.text "Some bold text"
     #   pdf.font("MyTrueTypeFamily")
     #   pdf.text "Some normal text"
@@ -170,6 +174,17 @@ def width_of(string, options = {})
     # defining your own font families, you can map any or all of these
     # styles to whatever font files you'd like.
     #
+    # Font definition can be either a hash or just a string.
+    #
+    # A hash font definition can specify a number of options:
+    #
+    # - :file -- path to the font file (required)
+    # - :subset -- whether to subset the font (default false). Only
+    #   applicable to TrueType and OpenType fonts (includnig DFont and TTC).
+    #
+    # A string font definition is equivalent to hash definition with only
+    # :file being specified.
+    #
     def font_families
       @font_families ||= {}.merge!(
         'Courier' => {
@@ -339,6 +354,8 @@ def initialize(document, name, options = {}) # :nodoc:
 
       @references = {}
       @subset_name_cache = {}
+
+      @full_font_embedding = options.key?(:subset) && !options[:subset]
     end
 
     # The size of the font ascender in PDF points
@@ -401,7 +418,12 @@ def add_to_current_page(subset)
     end
 
     def identifier_for(subset) # :nodoc:
-      @subset_name_cache[subset] ||= "#{@identifier}.#{subset}".to_sym
+      @subset_name_cache[subset] ||=
+        if full_font_embedding
+          @identifier.to_sym
+        else
+          "#{@identifier}.#{subset}".to_sym
+        end
     end
 
     def inspect # :nodoc:
@@ -426,6 +448,8 @@ def eql?(other) # :nodoc:
 
     private
 
+    attr_reader :full_font_embedding
+
     # generate a font identifier that hasn't been used on the current page yet
     #
     def generate_unique_id

diff --git a/lib/prawn/fonts/to_unicode_cmap.rb b/lib/prawn/fonts/to_unicode_cmap.rb
@@ -0,0 +1,141 @@
+# frozen_string_literal: true
+
+module Prawn
+  module Fonts
+    # @private
+    class ToUnicodeCMap
+      # mapping is expected to be a hash with keys being charater codes (in
+      # broad sense, as used in the showing operation strings) and values being
+      # Unicode code points
+      def initialize(mapping, code_space_size = nil)
+        @mapping = mapping
+        @code_space_size = code_space_size
+      end
+
+      def generate
+        chunks = []
+
+        # Header
+        chunks << <<~HEADER.chomp
+          /CIDInit /ProcSet findresource begin
+          12 dict begin
+          begincmap
+          /CIDSystemInfo 3 dict dup begin
+            /Registry (Adobe) def
+            /Ordering (UCS) def
+            /Supplement 0 def
+          end def
+          /CMapName /Adobe-Identity-UCS def
+          /CMapType 2 def
+        HEADER
+
+        max_glyph_index = mapping.keys.max
+        # Range
+        code_space_size = (max_glyph_index.bit_length / 8.0).ceil
+
+        used_code_space_size = @code_space_size || code_space_size
+
+        # In CMap codespaces are not sequentional, they're ranges in
+        # a multi-dimentional space. Each byte is considered separately. So we
+        # have to maximally extend the lower bytes in order to allow for
+        # continuos mapping.
+        # We only keep the highest byte because usually it's lower than
+        # maximally allowed and we don't want to cover that unused space.
+        code_space_max = max_glyph_index | ('ff' * (code_space_size - 1)).to_i(16)
+
+        chunks << '1 begincodespacerange'
+        chunks << format("<%0#{used_code_space_size * 2}X><%0#{used_code_space_size * 2}X>", 0, code_space_max)
+        chunks << 'endcodespacerange'
+
+        # Mapping
+        all_spans =
+          mapping_spans(
+            mapping.reject { |gid, cid| gid.zero? || (0xd800..0xdfff).cover?(cid) }
+          )
+
+        short_spans, long_spans = all_spans.partition { |span| span[0] == :short }
+
+        long_spans
+          .each_slice(100) do |spans|
+            chunks << "#{spans.length} beginbfrange"
+
+            spans.each do |type, span|
+              case type
+              when :fully_sorted
+                chunks << format(
+                  "<%0#{code_space_size * 2}X><%0#{code_space_size * 2}X><%s>",
+                  span.first[0],
+                  span.last[0],
+                  span.first[1].chr(::Encoding::UTF_16BE).unpack1('H*')
+                )
+              when :index_sorted
+                chunks << format(
+                  "<%0#{code_space_size * 2}X><%0#{code_space_size * 2}X>[%s]",
+                  span.first[0],
+                  span.last[0],
+                  span.map { |_, cid| "<#{cid.chr(::Encoding::UTF_16BE).unpack1('H*')}>" }.join('')
+                )
+              end
+            end
+
+            chunks << 'endbfrange'
+          end
+
+        short_spans
+          .map { |_type, slice| slice.flatten(1) }
+          .each_slice(100) do |mapping|
+            chunks << "#{mapping.length} beginbfchar"
+            chunks.concat(
+              mapping.map do |(gid, cid)|
+                format(
+                  "<%0#{code_space_size * 2}X><%s>",
+                  gid,
+                  cid.chr(::Encoding::UTF_16BE).unpack1('H*')
+                )
+              end
+            )
+            chunks << 'endbfchar'
+          end
+
+        # Footer
+        chunks << <<~FOOTER.chomp
+          endcmap
+          CMapName currentdict /CMap defineresource pop
+          end
+          end
+        FOOTER
+
+        chunks.join("\n")
+      end
+
+      private
+
+      attr_reader :mapping
+
+      attr_reader :cmap, :code_space_size, :code_space_max
+
+      def mapping_spans(mapping)
+        mapping
+          .sort
+          .slice_when { |a, b| (b[0] - a[0]) != 1 } # Slice at key discontinuity
+          .flat_map { |slice|
+            if slice.length == 1
+              [[:short, slice]]
+            else
+              continuous_slices, discontinuous_slices =
+                slice
+                  .slice_when { |a, b| b[1] - a[1] != 1 } # Slice at value discontinuity
+                  .partition { |subslice| subslice.length > 1 }
+
+              discontinuous_slices
+                .flatten(1) # Join together
+                .slice_when { |a, b| (b[0] - a[0]) != 1 } # Slice at key discontinuity, again
+                .map { |span| span.length > 1 ? [:index_sorted, span] : [:short, slice] } +
+                continuous_slices.map { |span| [:fully_sorted, span] }
+            end
+          }
+          .sort_by { |span| span[1][0][0] } # Sort span start key
+      end
+    end
+  end
+end