123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333 |
- """
- A class representing a Type 1 font.
- This version reads pfa and pfb files and splits them for embedding in
- pdf files. It also supports SlantFont and ExtendFont transformations,
- similarly to pdfTeX and friends. There is no support yet for subsetting.
- Usage::
- >>> font = Type1Font(filename)
- >>> clear_part, encrypted_part, finale = font.parts
- >>> slanted_font = font.transform({'slant': 0.167})
- >>> extended_font = font.transform({'extend': 1.2})
- Sources:
- * Adobe Technical Note #5040, Supporting Downloadable PostScript
- Language Fonts.
- * Adobe Type 1 Font Format, Adobe Systems Incorporated, third printing,
- v1.1, 1993. ISBN 0-201-57044-0.
- """
- import binascii
- import enum
- import itertools
- import re
- import struct
- import numpy as np
- # token types
- _TokenType = enum.Enum('_TokenType',
- 'whitespace name string delimiter number')
- class Type1Font:
- """
- A class representing a Type-1 font, for use by backends.
- Attributes
- ----------
- parts : tuple
- A 3-tuple of the cleartext part, the encrypted part, and the finale of
- zeros.
- prop : Dict[str, Any]
- A dictionary of font properties.
- """
- __slots__ = ('parts', 'prop')
- def __init__(self, input):
- """
- Initialize a Type-1 font.
- Parameters
- ----------
- input : str or 3-tuple
- Either a pfb file name, or a 3-tuple of already-decoded Type-1
- font `~.Type1Font.parts`.
- """
- if isinstance(input, tuple) and len(input) == 3:
- self.parts = input
- else:
- with open(input, 'rb') as file:
- data = self._read(file)
- self.parts = self._split(data)
- self._parse()
- def _read(self, file):
- """Read the font from a file, decoding into usable parts."""
- rawdata = file.read()
- if not rawdata.startswith(b'\x80'):
- return rawdata
- data = b''
- while rawdata:
- if not rawdata.startswith(b'\x80'):
- raise RuntimeError('Broken pfb file (expected byte 128, '
- 'got %d)' % rawdata[0])
- type = rawdata[1]
- if type in (1, 2):
- length, = struct.unpack('<i', rawdata[2:6])
- segment = rawdata[6:6 + length]
- rawdata = rawdata[6 + length:]
- if type == 1: # ASCII text: include verbatim
- data += segment
- elif type == 2: # binary data: encode in hexadecimal
- data += binascii.hexlify(segment)
- elif type == 3: # end of file
- break
- else:
- raise RuntimeError('Unknown segment type %d in pfb file' %
- type)
- return data
- def _split(self, data):
- """
- Split the Type 1 font into its three main parts.
- The three parts are: (1) the cleartext part, which ends in a
- eexec operator; (2) the encrypted part; (3) the fixed part,
- which contains 512 ASCII zeros possibly divided on various
- lines, a cleartomark operator, and possibly something else.
- """
- # Cleartext part: just find the eexec and skip whitespace
- idx = data.index(b'eexec')
- idx += len(b'eexec')
- while data[idx] in b' \t\r\n':
- idx += 1
- len1 = idx
- # Encrypted part: find the cleartomark operator and count
- # zeros backward
- idx = data.rindex(b'cleartomark') - 1
- zeros = 512
- while zeros and data[idx] in b'0' or data[idx] in b'\r\n':
- if data[idx] in b'0':
- zeros -= 1
- idx -= 1
- if zeros:
- raise RuntimeError('Insufficiently many zeros in Type 1 font')
- # Convert encrypted part to binary (if we read a pfb file, we may end
- # up converting binary to hexadecimal to binary again; but if we read
- # a pfa file, this part is already in hex, and I am not quite sure if
- # even the pfb format guarantees that it will be in binary).
- binary = binascii.unhexlify(data[len1:idx+1])
- return data[:len1], binary, data[idx+1:]
- _whitespace_re = re.compile(br'[\0\t\r\014\n ]+')
- _token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
- _comment_re = re.compile(br'%[^\r\n\v]*')
- _instring_re = re.compile(br'[()\\]')
- @classmethod
- def _tokens(cls, text):
- """
- A PostScript tokenizer. Yield (token, value) pairs such as
- (_TokenType.whitespace, ' ') or (_TokenType.name, '/Foobar').
- """
- pos = 0
- while pos < len(text):
- match = (cls._comment_re.match(text[pos:]) or
- cls._whitespace_re.match(text[pos:]))
- if match:
- yield (_TokenType.whitespace, match.group())
- pos += match.end()
- elif text[pos] == b'(':
- start = pos
- pos += 1
- depth = 1
- while depth:
- match = cls._instring_re.search(text[pos:])
- if match is None:
- return
- pos += match.end()
- if match.group() == b'(':
- depth += 1
- elif match.group() == b')':
- depth -= 1
- else: # a backslash - skip the next character
- pos += 1
- yield (_TokenType.string, text[start:pos])
- elif text[pos:pos + 2] in (b'<<', b'>>'):
- yield (_TokenType.delimiter, text[pos:pos + 2])
- pos += 2
- elif text[pos] == b'<':
- start = pos
- pos += text[pos:].index(b'>')
- yield (_TokenType.string, text[start:pos])
- else:
- match = cls._token_re.match(text[pos:])
- if match:
- try:
- float(match.group())
- yield (_TokenType.number, match.group())
- except ValueError:
- yield (_TokenType.name, match.group())
- pos += match.end()
- else:
- yield (_TokenType.delimiter, text[pos:pos + 1])
- pos += 1
- def _parse(self):
- """
- Find the values of various font properties. This limited kind
- of parsing is described in Chapter 10 "Adobe Type Manager
- Compatibility" of the Type-1 spec.
- """
- # Start with reasonable defaults
- prop = {'weight': 'Regular', 'ItalicAngle': 0.0, 'isFixedPitch': False,
- 'UnderlinePosition': -100, 'UnderlineThickness': 50}
- filtered = ((token, value)
- for token, value in self._tokens(self.parts[0])
- if token is not _TokenType.whitespace)
- # The spec calls this an ASCII format; in Python 2.x we could
- # just treat the strings and names as opaque bytes but let's
- # turn them into proper Unicode, and be lenient in case of high bytes.
- def convert(x): return x.decode('ascii', 'replace')
- for token, value in filtered:
- if token is _TokenType.name and value.startswith(b'/'):
- key = convert(value[1:])
- token, value = next(filtered)
- if token is _TokenType.name:
- if value in (b'true', b'false'):
- value = value == b'true'
- else:
- value = convert(value.lstrip(b'/'))
- elif token is _TokenType.string:
- value = convert(value.lstrip(b'(').rstrip(b')'))
- elif token is _TokenType.number:
- if b'.' in value:
- value = float(value)
- else:
- value = int(value)
- else: # more complicated value such as an array
- value = None
- if key != 'FontInfo' and value is not None:
- prop[key] = value
- # Fill in the various *Name properties
- if 'FontName' not in prop:
- prop['FontName'] = (prop.get('FullName') or
- prop.get('FamilyName') or
- 'Unknown')
- if 'FullName' not in prop:
- prop['FullName'] = prop['FontName']
- if 'FamilyName' not in prop:
- extras = ('(?i)([ -](regular|plain|italic|oblique|(semi)?bold|'
- '(ultra)?light|extra|condensed))+$')
- prop['FamilyName'] = re.sub(extras, '', prop['FullName'])
- self.prop = prop
- @classmethod
- def _transformer(cls, tokens, slant, extend):
- def fontname(name):
- result = name
- if slant:
- result += b'_Slant_%d' % int(1000 * slant)
- if extend != 1.0:
- result += b'_Extend_%d' % int(1000 * extend)
- return result
- def italicangle(angle):
- return b'%a' % (float(angle) - np.arctan(slant) / np.pi * 180)
- def fontmatrix(array):
- array = array.lstrip(b'[').rstrip(b']').split()
- array = [float(x) for x in array]
- oldmatrix = np.eye(3, 3)
- oldmatrix[0:3, 0] = array[::2]
- oldmatrix[0:3, 1] = array[1::2]
- modifier = np.array([[extend, 0, 0],
- [slant, 1, 0],
- [0, 0, 1]])
- newmatrix = np.dot(modifier, oldmatrix)
- array[::2] = newmatrix[0:3, 0]
- array[1::2] = newmatrix[0:3, 1]
- # Not directly using `b'%a' % x for x in array` for now as that
- # produces longer reprs on numpy<1.14, causing test failures.
- as_string = '[' + ' '.join(str(x) for x in array) + ']'
- return as_string.encode('latin-1')
- def replace(fun):
- def replacer(tokens):
- token, value = next(tokens) # name, e.g., /FontMatrix
- yield value
- token, value = next(tokens) # possible whitespace
- while token is _TokenType.whitespace:
- yield value
- token, value = next(tokens)
- if value != b'[': # name/number/etc.
- yield fun(value)
- else: # array, e.g., [1 2 3]
- result = b''
- while value != b']':
- result += value
- token, value = next(tokens)
- result += value
- yield fun(result)
- return replacer
- def suppress(tokens):
- for _ in itertools.takewhile(lambda x: x[1] != b'def', tokens):
- pass
- yield b''
- table = {b'/FontName': replace(fontname),
- b'/ItalicAngle': replace(italicangle),
- b'/FontMatrix': replace(fontmatrix),
- b'/UniqueID': suppress}
- for token, value in tokens:
- if token is _TokenType.name and value in table:
- yield from table[value](
- itertools.chain([(token, value)], tokens))
- else:
- yield value
- def transform(self, effects):
- """
- Return a new font that is slanted and/or extended.
- Parameters
- ----------
- effects : dict
- A dict with optional entries:
- - 'slant' : float, default: 0
- Tangent of the angle that the font is to be slanted to the
- right. Negative values slant to the left.
- - 'extend' : float, default: 1
- Scaling factor for the font width. Values less than 1 condense
- the glyphs.
- Returns
- -------
- `Type1Font`
- """
- tokenizer = self._tokens(self.parts[0])
- transformed = self._transformer(tokenizer,
- slant=effects.get('slant', 0.0),
- extend=effects.get('extend', 1.0))
- return Type1Font((b"".join(transformed), self.parts[1], self.parts[2]))
|