1076 lines
40 KiB
Python
1076 lines
40 KiB
Python
#******************************************************************************
|
|
# TTFontFile class
|
|
#
|
|
# This class is based on The ReportLab Open Source PDF library
|
|
# written in Python - http://www.reportlab.com/software/opensource/
|
|
# together with ideas from the OpenOffice source code and others.
|
|
#
|
|
# Version: 1.04
|
|
# Date: 2011-09-18
|
|
# Author: Ian Back <ianb@bpm1.com>
|
|
# License: LGPL
|
|
# Copyright (c) Ian Back, 2010
|
|
# Ported to Python 2.7 by Mariano Reingart (reingart@gmail.com) on 2012
|
|
# This header must be retained in any redistribution or
|
|
# modification of the file.
|
|
#
|
|
#******************************************************************************
|
|
|
|
from __future__ import with_statement
|
|
|
|
from struct import pack, unpack, unpack_from
|
|
import re
|
|
import warnings
|
|
from .php import die, substr, str_repeat, str_pad, strlen, count
|
|
from .py3k import b, ord
|
|
|
|
|
|
# Define the value used in the "head" table of a created TTF file
|
|
# 0x74727565 "true" for Mac
|
|
# 0x00010000 for Windows
|
|
# Either seems to work for a font embedded in a PDF file
|
|
# when read by Adobe Reader on a Windows PC(!)
|
|
_TTF_MAC_HEADER = False
|
|
|
|
|
|
# TrueType Font Glyph operators
|
|
GF_WORDS = (1 << 0)
|
|
GF_SCALE = (1 << 3)
|
|
GF_MORE = (1 << 5)
|
|
GF_XYSCALE = (1 << 6)
|
|
GF_TWOBYTWO = (1 << 7)
|
|
|
|
|
|
def sub32(x, y):
|
|
xlo = x[1]
|
|
xhi = x[0]
|
|
ylo = y[1]
|
|
yhi = y[0]
|
|
if (ylo > xlo):
|
|
xlo += 1 << 16
|
|
yhi += 1
|
|
reslo = xlo-ylo
|
|
if (yhi > xhi):
|
|
xhi += 1 << 16
|
|
reshi = xhi-yhi
|
|
reshi = reshi & 0xFFFF
|
|
return (reshi, reslo)
|
|
|
|
def calcChecksum(data):
|
|
if (strlen(data) % 4):
|
|
data += str_repeat(b("\0"), (4-(len(data) % 4)))
|
|
hi=0x0000
|
|
lo=0x0000
|
|
for i in range(0, len(data), 4):
|
|
hi += (ord(data[i])<<8) + ord(data[i+1])
|
|
lo += (ord(data[i+2])<<8) + ord(data[i+3])
|
|
hi += lo >> 16
|
|
lo = lo & 0xFFFF
|
|
hi = hi & 0xFFFF
|
|
return (hi, lo)
|
|
|
|
|
|
class TTFontFile:
|
|
|
|
def __init__(self):
|
|
self.maxStrLenRead = 200000 # Maximum size of glyf table to read in as string (otherwise reads each glyph from file)
|
|
|
|
def getMetrics(self, file):
|
|
self.filename = file
|
|
with open(file,'rb') as self.fh:
|
|
self._pos = 0
|
|
self.charWidths = []
|
|
self.glyphPos = {}
|
|
self.charToGlyph = {}
|
|
self.tables = {}
|
|
self.otables = {}
|
|
self.ascent = 0
|
|
self.descent = 0
|
|
self.TTCFonts = {}
|
|
self.version = version = self.read_ulong()
|
|
if (version==0x4F54544F):
|
|
die("Postscript outlines are not supported")
|
|
if (version==0x74746366):
|
|
die("ERROR - TrueType Fonts Collections not supported")
|
|
if (version not in (0x00010000,0x74727565)):
|
|
die("Not a TrueType font: version=" + str(version))
|
|
self.readTableDirectory()
|
|
self.extractInfo()
|
|
|
|
def readTableDirectory(self, ):
|
|
self.numTables = self.read_ushort()
|
|
self.searchRange = self.read_ushort()
|
|
self.entrySelector = self.read_ushort()
|
|
self.rangeShift = self.read_ushort()
|
|
self.tables = {}
|
|
for i in range(self.numTables):
|
|
record = {}
|
|
record['tag'] = self.read_tag()
|
|
record['checksum'] = (self.read_ushort(),self.read_ushort())
|
|
record['offset'] = self.read_ulong()
|
|
record['length'] = self.read_ulong()
|
|
self.tables[record['tag']] = record
|
|
|
|
def get_table_pos(self, tag):
|
|
offset = self.tables[tag]['offset']
|
|
length = self.tables[tag]['length']
|
|
return (offset, length)
|
|
|
|
def seek(self, pos):
|
|
self._pos = pos
|
|
self.fh.seek(self._pos)
|
|
|
|
def skip(self, delta):
|
|
self._pos = self._pos + delta
|
|
self.fh.seek(self._pos)
|
|
|
|
def seek_table(self, tag, offset_in_table = 0):
|
|
tpos = self.get_table_pos(tag)
|
|
self._pos = tpos[0] + offset_in_table
|
|
self.fh.seek(self._pos)
|
|
return self._pos
|
|
|
|
def read_tag(self):
|
|
self._pos += 4
|
|
return self.fh.read(4).decode("latin1")
|
|
|
|
def read_short(self):
|
|
self._pos += 2
|
|
s = self.fh.read(2)
|
|
a = (ord(s[0])<<8) + ord(s[1])
|
|
if (a & (1 << 15) ):
|
|
a = (a - (1 << 16))
|
|
return a
|
|
|
|
def unpack_short(self, s):
|
|
a = (ord(s[0])<<8) + ord(s[1])
|
|
if (a & (1 << 15) ):
|
|
a = (a - (1 << 16))
|
|
return a
|
|
|
|
def read_ushort(self):
|
|
self._pos += 2
|
|
s = self.fh.read(2)
|
|
return (ord(s[0])<<8) + ord(s[1])
|
|
|
|
def read_ulong(self):
|
|
self._pos += 4
|
|
s = self.fh.read(4)
|
|
# if large uInt32 as an integer, PHP converts it to -ve
|
|
return (ord(s[0])*16777216) + (ord(s[1])<<16) + (ord(s[2])<<8) + ord(s[3]) # 16777216 = 1<<24
|
|
|
|
def get_ushort(self, pos):
|
|
self.fh.seek(pos)
|
|
s = self.fh.read(2)
|
|
return (ord(s[0])<<8) + ord(s[1])
|
|
|
|
def get_ulong(self, pos):
|
|
self.fh.seek(pos)
|
|
s = self.fh.read(4)
|
|
# iF large uInt32 as an integer, PHP converts it to -ve
|
|
return (ord(s[0])*16777216) + (ord(s[1])<<16) + (ord(s[2])<<8) + ord(s[3]) # 16777216 = 1<<24
|
|
|
|
def pack_short(self, val):
|
|
if (val<0):
|
|
val = abs(val)
|
|
val = ~val
|
|
val += 1
|
|
return pack(">H",val)
|
|
|
|
def splice(self, stream, offset, value):
|
|
return substr(stream,0,offset) + value + substr(stream,offset+strlen(value))
|
|
|
|
def _set_ushort(self, stream, offset, value):
|
|
up = pack(">H", value)
|
|
return self.splice(stream, offset, up)
|
|
|
|
def _set_short(self, stream, offset, val):
|
|
if (val<0):
|
|
val = abs(val)
|
|
val = ~val
|
|
val += 1
|
|
up = pack(">H",val)
|
|
return self.splice(stream, offset, up)
|
|
|
|
def get_chunk(self, pos, length):
|
|
self.fh.seek(pos)
|
|
if (length <1): return ''
|
|
return (self.fh.read(length))
|
|
|
|
def get_table(self, tag):
|
|
(pos, length) = self.get_table_pos(tag)
|
|
if (length == 0):
|
|
die('Truetype font (' + self.filename + '): error reading table: ' + tag)
|
|
self.fh.seek(pos)
|
|
return (self.fh.read(length))
|
|
|
|
def add(self, tag, data):
|
|
if (tag == 'head') :
|
|
data = self.splice(data, 8, b("\0\0\0\0"))
|
|
self.otables[tag] = data
|
|
|
|
############################################/
|
|
############################################/
|
|
|
|
############################################/
|
|
|
|
def extractInfo(self):
|
|
#################/
|
|
# name - Naming table
|
|
#################/
|
|
self.sFamilyClass = 0
|
|
self.sFamilySubClass = 0
|
|
|
|
name_offset = self.seek_table("name")
|
|
format = self.read_ushort()
|
|
if (format != 0):
|
|
die("Unknown name table format " + format)
|
|
numRecords = self.read_ushort()
|
|
string_data_offset = name_offset + self.read_ushort()
|
|
names = {1:'',2:'',3:'',4:'',6:''}
|
|
K = list(names.keys())
|
|
nameCount = len(names)
|
|
for i in range(numRecords):
|
|
platformId = self.read_ushort()
|
|
encodingId = self.read_ushort()
|
|
languageId = self.read_ushort()
|
|
nameId = self.read_ushort()
|
|
length = self.read_ushort()
|
|
offset = self.read_ushort()
|
|
if (nameId not in K): continue
|
|
N = ''
|
|
if (platformId == 3 and encodingId == 1 and languageId == 0x409): # Microsoft, Unicode, US English, PS Name
|
|
opos = self._pos
|
|
self.seek(string_data_offset + offset)
|
|
if (length % 2 != 0):
|
|
die("PostScript name is UTF-16BE string of odd length")
|
|
length //= 2
|
|
N = ''
|
|
while (length > 0):
|
|
char = self.read_ushort()
|
|
N += (chr(char))
|
|
length -= 1
|
|
self._pos = opos
|
|
self.seek(opos)
|
|
|
|
elif (platformId == 1 and encodingId == 0 and languageId == 0): # Macintosh, Roman, English, PS Name
|
|
opos = self._pos
|
|
N = self.get_chunk(string_data_offset + offset, length).decode("latin1")
|
|
self._pos = opos
|
|
self.seek(opos)
|
|
|
|
if (N and names[nameId]==''):
|
|
names[nameId] = N
|
|
nameCount -= 1
|
|
if (nameCount==0): break
|
|
|
|
|
|
if (names[6]):
|
|
psName = names[6]
|
|
elif (names[4]):
|
|
psName = re.sub(' ','-',names[4])
|
|
elif (names[1]):
|
|
psName = re.sub(' ','-',names[1])
|
|
else:
|
|
psName = ''
|
|
if (not psName):
|
|
die("Could not find PostScript font name")
|
|
self.name = psName
|
|
if (names[1]):
|
|
self.familyName = names[1]
|
|
else:
|
|
self.familyName = psName
|
|
if (names[2]):
|
|
self.styleName = names[2]
|
|
else:
|
|
self.styleName = 'Regular'
|
|
if (names[4]):
|
|
self.fullName = names[4]
|
|
else:
|
|
self.fullName = psName
|
|
if (names[3]):
|
|
self.uniqueFontID = names[3]
|
|
else:
|
|
self.uniqueFontID = psName
|
|
if (names[6]):
|
|
self.fullName = names[6]
|
|
|
|
#################/
|
|
# head - Font header table
|
|
#################/
|
|
self.seek_table("head")
|
|
self.skip(18)
|
|
self.unitsPerEm = unitsPerEm = self.read_ushort()
|
|
scale = 1000 / float(unitsPerEm)
|
|
self.skip(16)
|
|
xMin = self.read_short()
|
|
yMin = self.read_short()
|
|
xMax = self.read_short()
|
|
yMax = self.read_short()
|
|
self.bbox = [(xMin*scale), (yMin*scale), (xMax*scale), (yMax*scale)]
|
|
self.skip(3*2)
|
|
indexToLocFormat = self.read_ushort()
|
|
glyphDataFormat = self.read_ushort()
|
|
if (glyphDataFormat != 0):
|
|
die('Unknown glyph data format ' + glyphDataFormat)
|
|
|
|
#################/
|
|
# hhea metrics table
|
|
#################/
|
|
# ttf2t1 seems to use this value rather than the one in OS/2 - so put in for compatibility
|
|
if ("hhea" in self.tables):
|
|
self.seek_table("hhea")
|
|
self.skip(4)
|
|
hheaAscender = self.read_short()
|
|
hheaDescender = self.read_short()
|
|
self.ascent = (hheaAscender *scale)
|
|
self.descent = (hheaDescender *scale)
|
|
|
|
|
|
#################/
|
|
# OS/2 - OS/2 and Windows metrics table
|
|
#################/
|
|
if ("OS/2" in self.tables):
|
|
self.seek_table("OS/2")
|
|
version = self.read_ushort()
|
|
self.skip(2)
|
|
usWeightClass = self.read_ushort()
|
|
self.skip(2)
|
|
fsType = self.read_ushort()
|
|
if (fsType == 0x0002 or (fsType & 0x0300) != 0):
|
|
die('ERROR - Font file ' + self.filename + ' cannot be embedded due to copyright restrictions.')
|
|
self.restrictedUse = True
|
|
|
|
self.skip(20)
|
|
sF = self.read_short()
|
|
self.sFamilyClass = (sF >> 8)
|
|
self.sFamilySubClass = (sF & 0xFF)
|
|
self._pos += 10 #PANOSE = 10 byte length
|
|
panose = self.fh.read(10)
|
|
self.skip(26)
|
|
sTypoAscender = self.read_short()
|
|
sTypoDescender = self.read_short()
|
|
if (not self.ascent):
|
|
self.ascent = (sTypoAscender*scale)
|
|
if (not self.descent):
|
|
self.descent = (sTypoDescender*scale)
|
|
if (version > 1):
|
|
self.skip(16)
|
|
sCapHeight = self.read_short()
|
|
self.capHeight = (sCapHeight*scale)
|
|
else:
|
|
self.capHeight = self.ascent
|
|
|
|
else:
|
|
usWeightClass = 500
|
|
if (not self.ascent): self.ascent = (yMax*scale)
|
|
if (not self.descent): self.descent = (yMin*scale)
|
|
self.capHeight = self.ascent
|
|
|
|
self.stemV = 50 + int(pow((usWeightClass / 65.0),2))
|
|
|
|
#################/
|
|
# post - PostScript table
|
|
#################/
|
|
self.seek_table("post")
|
|
self.skip(4)
|
|
self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
|
|
self.underlinePosition = self.read_short() * scale
|
|
self.underlineThickness = self.read_short() * scale
|
|
isFixedPitch = self.read_ulong()
|
|
|
|
self.flags = 4
|
|
|
|
if (self.italicAngle!= 0):
|
|
self.flags = self.flags | 64
|
|
if (usWeightClass >= 600):
|
|
self.flags = self.flags | 262144
|
|
if (isFixedPitch):
|
|
self.flags = self.flags | 1
|
|
|
|
#################/
|
|
# hhea - Horizontal header table
|
|
#################/
|
|
self.seek_table("hhea")
|
|
self.skip(32)
|
|
metricDataFormat = self.read_ushort()
|
|
if (metricDataFormat != 0):
|
|
die('Unknown horizontal metric data format '.metricDataFormat)
|
|
numberOfHMetrics = self.read_ushort()
|
|
if (numberOfHMetrics == 0):
|
|
die('Number of horizontal metrics is 0')
|
|
|
|
#################/
|
|
# maxp - Maximum profile table
|
|
#################/
|
|
self.seek_table("maxp")
|
|
self.skip(4)
|
|
numGlyphs = self.read_ushort()
|
|
|
|
#################/
|
|
# cmap - Character to glyph index mapping table
|
|
#################/
|
|
cmap_offset = self.seek_table("cmap")
|
|
self.skip(2)
|
|
cmapTableCount = self.read_ushort()
|
|
unicode_cmap_offset = 0
|
|
unicode_cmap_offset12 = 0
|
|
|
|
for i in range(cmapTableCount):
|
|
platformID = self.read_ushort()
|
|
encodingID = self.read_ushort()
|
|
offset = self.read_ulong()
|
|
save_pos = self._pos
|
|
if platformID == 3 and encodingID == 10: # Microsoft, UCS-4
|
|
format = self.get_ushort(cmap_offset + offset)
|
|
if (format == 12):
|
|
if not unicode_cmap_offset12:
|
|
unicode_cmap_offset12 = cmap_offset + offset
|
|
break
|
|
if ((platformID == 3 and encodingID == 1) or platformID == 0): # Microsoft, Unicode
|
|
format = self.get_ushort(cmap_offset + offset)
|
|
if (format == 4):
|
|
if (not unicode_cmap_offset):
|
|
unicode_cmap_offset = cmap_offset + offset
|
|
break
|
|
|
|
self.seek(save_pos)
|
|
|
|
if not unicode_cmap_offset and not unicode_cmap_offset12:
|
|
die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)')
|
|
|
|
glyphToChar = {}
|
|
charToGlyph = {}
|
|
if unicode_cmap_offset12:
|
|
self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph)
|
|
else:
|
|
self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)
|
|
|
|
#################/
|
|
# hmtx - Horizontal metrics table
|
|
#################/
|
|
self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)
|
|
|
|
|
|
############################################/
|
|
############################################/
|
|
|
|
def makeSubset(self, file, subset):
|
|
self.filename = file
|
|
with open(file ,'rb') as self.fh:
|
|
self._pos = 0
|
|
self.charWidths = []
|
|
self.glyphPos = {}
|
|
self.charToGlyph = {}
|
|
self.tables = {}
|
|
self.otables = {}
|
|
self.ascent = 0
|
|
self.descent = 0
|
|
self.skip(4)
|
|
self.maxUni = 0
|
|
self.readTableDirectory()
|
|
|
|
#################/
|
|
# head - Font header table
|
|
#################/
|
|
self.seek_table("head")
|
|
self.skip(50)
|
|
indexToLocFormat = self.read_ushort()
|
|
glyphDataFormat = self.read_ushort()
|
|
|
|
#################/
|
|
# hhea - Horizontal header table
|
|
#################/
|
|
self.seek_table("hhea")
|
|
self.skip(32)
|
|
metricDataFormat = self.read_ushort()
|
|
orignHmetrics = numberOfHMetrics = self.read_ushort()
|
|
|
|
#################/
|
|
# maxp - Maximum profile table
|
|
#################/
|
|
self.seek_table("maxp")
|
|
self.skip(4)
|
|
numGlyphs = self.read_ushort()
|
|
|
|
#################/
|
|
# cmap - Character to glyph index mapping table
|
|
#################/
|
|
cmap_offset = self.seek_table("cmap")
|
|
self.skip(2)
|
|
cmapTableCount = self.read_ushort()
|
|
unicode_cmap_offset = 0
|
|
unicode_cmap_offset12 = 0
|
|
for i in range(cmapTableCount):
|
|
platformID = self.read_ushort()
|
|
encodingID = self.read_ushort()
|
|
offset = self.read_ulong()
|
|
save_pos = self._pos
|
|
if platformID == 3 and encodingID == 10: # Microsoft, UCS-4
|
|
format = self.get_ushort(cmap_offset + offset)
|
|
if (format == 12):
|
|
if not unicode_cmap_offset12:
|
|
unicode_cmap_offset12 = cmap_offset + offset
|
|
break
|
|
if ((platformID == 3 and encodingID == 1) or platformID == 0): # Microsoft, Unicode
|
|
format = self.get_ushort(cmap_offset + offset)
|
|
if (format == 4):
|
|
unicode_cmap_offset = cmap_offset + offset
|
|
break
|
|
|
|
self.seek(save_pos )
|
|
|
|
if not unicode_cmap_offset and not unicode_cmap_offset12:
|
|
die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)')
|
|
|
|
glyphToChar = {}
|
|
charToGlyph = {}
|
|
if unicode_cmap_offset12:
|
|
self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph)
|
|
else:
|
|
self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)
|
|
|
|
self.charToGlyph = charToGlyph
|
|
|
|
#################/
|
|
# hmtx - Horizontal metrics table
|
|
#################/
|
|
scale = 1 # not used
|
|
self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)
|
|
|
|
#################/
|
|
# loca - Index to location
|
|
#################/
|
|
self.getLOCA(indexToLocFormat, numGlyphs)
|
|
|
|
subsetglyphs = [(0, 0)] # special "sorted dict"!
|
|
subsetCharToGlyph = {}
|
|
for code in subset:
|
|
if (code in self.charToGlyph):
|
|
if (self.charToGlyph[code], code) not in subsetglyphs:
|
|
subsetglyphs.append((self.charToGlyph[code], code)) # Old Glyph ID => Unicode
|
|
subsetCharToGlyph[code] = self.charToGlyph[code] # Unicode to old GlyphID
|
|
self.maxUni = max(self.maxUni, code)
|
|
(start,dummy) = self.get_table_pos('glyf')
|
|
|
|
subsetglyphs.sort()
|
|
glyphSet = {}
|
|
n = 0
|
|
fsLastCharIndex = 0 # maximum Unicode index (character code) in this font, according to the cmap subtable for platform ID 3 and platform- specific encoding ID 0 or 1.
|
|
for originalGlyphIdx, uni in subsetglyphs:
|
|
fsLastCharIndex = max(fsLastCharIndex , uni)
|
|
glyphSet[originalGlyphIdx] = n # old glyphID to new glyphID
|
|
n += 1
|
|
|
|
codeToGlyph = {}
|
|
for uni, originalGlyphIdx in sorted(subsetCharToGlyph.items()):
|
|
codeToGlyph[uni] = glyphSet[originalGlyphIdx]
|
|
|
|
self.codeToGlyph = codeToGlyph
|
|
|
|
for originalGlyphIdx, uni in subsetglyphs:
|
|
nonlocals = {'start': start, 'glyphSet': glyphSet,
|
|
'subsetglyphs': subsetglyphs}
|
|
self.getGlyphs(originalGlyphIdx, nonlocals)
|
|
|
|
numGlyphs = numberOfHMetrics = len(subsetglyphs)
|
|
|
|
#tables copied from the original
|
|
tags = ['name']
|
|
for tag in tags:
|
|
self.add(tag, self.get_table(tag))
|
|
tags = ['cvt ', 'fpgm', 'prep', 'gasp']
|
|
for tag in tags:
|
|
if (tag in self.tables):
|
|
self.add(tag, self.get_table(tag))
|
|
|
|
# post - PostScript
|
|
opost = self.get_table('post')
|
|
post = b("\x00\x03\x00\x00") + substr(opost,4,12) + b("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00")
|
|
self.add('post', post)
|
|
|
|
# Sort CID2GID map into segments of contiguous codes
|
|
if 0 in codeToGlyph:
|
|
del codeToGlyph[0]
|
|
#unset(codeToGlyph[65535])
|
|
rangeid = 0
|
|
range_ = {}
|
|
prevcid = -2
|
|
prevglidx = -1
|
|
# for each character
|
|
for cid, glidx in sorted(codeToGlyph.items()):
|
|
if (cid == (prevcid + 1) and glidx == (prevglidx + 1)):
|
|
range_[rangeid].append(glidx)
|
|
else:
|
|
# new range
|
|
rangeid = cid
|
|
range_[rangeid] = []
|
|
range_[rangeid].append(glidx)
|
|
prevcid = cid
|
|
prevglidx = glidx
|
|
|
|
# cmap - Character to glyph mapping - Format 4 (MS / )
|
|
segCount = len(range_) + 1 # + 1 Last segment has missing character 0xFFFF
|
|
searchRange = 1
|
|
entrySelector = 0
|
|
while (searchRange * 2 <= segCount ):
|
|
searchRange = searchRange * 2
|
|
entrySelector = entrySelector + 1
|
|
|
|
searchRange = searchRange * 2
|
|
rangeShift = segCount * 2 - searchRange
|
|
length = 16 + (8*segCount ) + (numGlyphs+1)
|
|
cmap = [0, 1, # Index : version, number of encoding subtables
|
|
3, 1, # Encoding Subtable : platform (MS=3), encoding (Unicode)
|
|
0, 12, # Encoding Subtable : offset (hi,lo)
|
|
4, length, 0, # Format 4 Mapping subtable: format, length, language
|
|
segCount*2,
|
|
searchRange,
|
|
entrySelector,
|
|
rangeShift]
|
|
|
|
range_ = sorted(range_.items())
|
|
|
|
# endCode(s)
|
|
for start, subrange in range_:
|
|
endCode = start + (len(subrange)-1)
|
|
cmap.append(endCode) # endCode(s)
|
|
|
|
cmap.append(0xFFFF) # endCode of last Segment
|
|
cmap.append(0) # reservedPad
|
|
|
|
# startCode(s)
|
|
for start, subrange in range_:
|
|
cmap.append(start) # startCode(s)
|
|
|
|
cmap.append(0xFFFF) # startCode of last Segment
|
|
# idDelta(s)
|
|
for start, subrange in range_:
|
|
idDelta = -(start-subrange[0])
|
|
n += count(subrange)
|
|
cmap.append(idDelta) # idDelta(s)
|
|
|
|
cmap.append(1) # idDelta of last Segment
|
|
# idRangeOffset(s)
|
|
for subrange in range_:
|
|
cmap.append(0) # idRangeOffset[segCount] Offset in bytes to glyph indexArray, or 0
|
|
|
|
cmap.append(0) # idRangeOffset of last Segment
|
|
for subrange, glidx in range_:
|
|
cmap.extend(glidx)
|
|
|
|
cmap.append(0) # Mapping for last character
|
|
cmapstr = b('')
|
|
for cm in cmap:
|
|
if cm >= 0:
|
|
cmapstr += pack(">H", cm)
|
|
else:
|
|
try:
|
|
cmapstr += pack(">h", cm)
|
|
except:
|
|
warnings.warn("cmap value too big/small: %s" % cm)
|
|
cmapstr += pack(">H", -cm)
|
|
self.add('cmap', cmapstr)
|
|
|
|
# glyf - Glyph data
|
|
(glyfOffset,glyfLength) = self.get_table_pos('glyf')
|
|
if (glyfLength < self.maxStrLenRead):
|
|
glyphData = self.get_table('glyf')
|
|
|
|
offsets = []
|
|
glyf = b('')
|
|
pos = 0
|
|
|
|
hmtxstr = b('')
|
|
xMinT = 0
|
|
yMinT = 0
|
|
xMaxT = 0
|
|
yMaxT = 0
|
|
advanceWidthMax = 0
|
|
minLeftSideBearing = 0
|
|
minRightSideBearing = 0
|
|
xMaxExtent = 0
|
|
maxPoints = 0 # points in non-compound glyph
|
|
maxContours = 0 # contours in non-compound glyph
|
|
maxComponentPoints = 0 # points in compound glyph
|
|
maxComponentContours = 0 # contours in compound glyph
|
|
maxComponentElements = 0 # number of glyphs referenced at top level
|
|
maxComponentDepth = 0 # levels of recursion, set to 0 if font has only simple glyphs
|
|
self.glyphdata = {}
|
|
|
|
for originalGlyphIdx, uni in subsetglyphs:
|
|
# hmtx - Horizontal Metrics
|
|
hm = self.getHMetric(orignHmetrics, originalGlyphIdx)
|
|
hmtxstr += hm
|
|
|
|
offsets.append(pos)
|
|
try:
|
|
glyphPos = self.glyphPos[originalGlyphIdx]
|
|
glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos
|
|
except IndexError:
|
|
warnings.warn("missing glyph %s" % (originalGlyphIdx))
|
|
glyphLen = 0
|
|
|
|
if (glyfLength < self.maxStrLenRead):
|
|
data = substr(glyphData,glyphPos,glyphLen)
|
|
else:
|
|
if (glyphLen > 0):
|
|
data = self.get_chunk(glyfOffset+glyphPos,glyphLen)
|
|
else:
|
|
data = b('')
|
|
|
|
if (glyphLen > 0):
|
|
up = unpack(">H", substr(data,0,2))[0]
|
|
if (glyphLen > 2 and (up & (1 << 15)) ): # If number of contours <= -1 i.e. composite glyph
|
|
pos_in_glyph = 10
|
|
flags = GF_MORE
|
|
nComponentElements = 0
|
|
while (flags & GF_MORE):
|
|
nComponentElements += 1 # number of glyphs referenced at top level
|
|
up = unpack(">H", substr(data,pos_in_glyph,2))
|
|
flags = up[0]
|
|
up = unpack(">H", substr(data,pos_in_glyph+2,2))
|
|
glyphIdx = up[0]
|
|
self.glyphdata.setdefault(originalGlyphIdx, {}).setdefault('compGlyphs', []).append(glyphIdx)
|
|
try:
|
|
data = self._set_ushort(data, pos_in_glyph + 2, glyphSet[glyphIdx])
|
|
except KeyError:
|
|
data = 0
|
|
warnings.warn("missing glyph data %s" % glyphIdx)
|
|
pos_in_glyph += 4
|
|
if (flags & GF_WORDS):
|
|
pos_in_glyph += 4
|
|
else:
|
|
pos_in_glyph += 2
|
|
if (flags & GF_SCALE):
|
|
pos_in_glyph += 2
|
|
elif (flags & GF_XYSCALE):
|
|
pos_in_glyph += 4
|
|
elif (flags & GF_TWOBYTWO):
|
|
pos_in_glyph += 8
|
|
|
|
maxComponentElements = max(maxComponentElements, nComponentElements)
|
|
|
|
glyf += data
|
|
pos += glyphLen
|
|
if (pos % 4 != 0):
|
|
padding = 4 - (pos % 4)
|
|
glyf += str_repeat(b("\0"),padding)
|
|
pos += padding
|
|
|
|
offsets.append(pos)
|
|
self.add('glyf', glyf)
|
|
|
|
# hmtx - Horizontal Metrics
|
|
self.add('hmtx', hmtxstr)
|
|
|
|
# loca - Index to location
|
|
locastr = b('')
|
|
if (((pos + 1) >> 1) > 0xFFFF):
|
|
indexToLocFormat = 1 # long format
|
|
for offset in offsets:
|
|
locastr += pack(">L",offset)
|
|
else:
|
|
indexToLocFormat = 0 # short format
|
|
for offset in offsets:
|
|
locastr += pack(">H",offset//2)
|
|
|
|
self.add('loca', locastr)
|
|
|
|
# head - Font header
|
|
head = self.get_table('head')
|
|
head = self._set_ushort(head, 50, indexToLocFormat)
|
|
self.add('head', head)
|
|
|
|
# hhea - Horizontal Header
|
|
hhea = self.get_table('hhea')
|
|
hhea = self._set_ushort(hhea, 34, numberOfHMetrics)
|
|
self.add('hhea', hhea)
|
|
|
|
# maxp - Maximum Profile
|
|
maxp = self.get_table('maxp')
|
|
maxp = self._set_ushort(maxp, 4, numGlyphs)
|
|
self.add('maxp', maxp)
|
|
|
|
# OS/2 - OS/2
|
|
os2 = self.get_table('OS/2')
|
|
self.add('OS/2', os2 )
|
|
|
|
# Put the TTF file together
|
|
stm = self.endTTFile('')
|
|
return stm
|
|
|
|
|
|
#########################################
|
|
# Recursively get composite glyph data
|
|
def getGlyphData(self, originalGlyphIdx, nonlocals):
|
|
# &maxdepth, &depth, &points, &contours
|
|
nonlocals['depth'] += 1
|
|
nonlocals['maxdepth'] = max(nonlocals['maxdepth'], nonlocals['depth'])
|
|
if (len(self.glyphdata[originalGlyphIdx]['compGlyphs'])):
|
|
for glyphIdx in self.glyphdata[originalGlyphIdx]['compGlyphs']:
|
|
self.getGlyphData(glyphIdx, nonlocals)
|
|
|
|
elif ((self.glyphdata[originalGlyphIdx]['nContours'] > 0) and nonlocals['depth'] > 0): # simple
|
|
contours += self.glyphdata[originalGlyphIdx]['nContours']
|
|
points += self.glyphdata[originalGlyphIdx]['nPoints']
|
|
|
|
nonlocals['depth'] -= 1
|
|
|
|
|
|
#########################################
|
|
# Recursively get composite glyphs
|
|
def getGlyphs(self, originalGlyphIdx, nonlocals):
|
|
# &start, &glyphSet, &subsetglyphs)
|
|
|
|
try:
|
|
glyphPos = self.glyphPos[originalGlyphIdx]
|
|
glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos
|
|
except IndexError:
|
|
warnings.warn("missing glyph %s" % (originalGlyphIdx))
|
|
return
|
|
|
|
if (not glyphLen):
|
|
return
|
|
|
|
self.seek(nonlocals['start'] + glyphPos)
|
|
numberOfContours = self.read_short()
|
|
if (numberOfContours < 0):
|
|
self.skip(8)
|
|
flags = GF_MORE
|
|
while (flags & GF_MORE):
|
|
flags = self.read_ushort()
|
|
glyphIdx = self.read_ushort()
|
|
if (glyphIdx not in nonlocals['glyphSet']):
|
|
nonlocals['glyphSet'][glyphIdx] = len(nonlocals['subsetglyphs']) # old glyphID to new glyphID
|
|
nonlocals['subsetglyphs'].append((glyphIdx, 1))
|
|
|
|
savepos = self.fh.tell()
|
|
self.getGlyphs(glyphIdx, nonlocals)
|
|
self.seek(savepos)
|
|
if (flags & GF_WORDS):
|
|
self.skip(4)
|
|
else:
|
|
self.skip(2)
|
|
if (flags & GF_SCALE):
|
|
self.skip(2)
|
|
elif (flags & GF_XYSCALE):
|
|
self.skip(4)
|
|
elif (flags & GF_TWOBYTWO):
|
|
self.skip(8)
|
|
|
|
#########################################
|
|
|
|
def getHMTX(self, numberOfHMetrics, numGlyphs, glyphToChar, scale):
|
|
start = self.seek_table("hmtx")
|
|
aw = 0
|
|
self.charWidths = []
|
|
def resize_cw(size, default):
|
|
size = (((size + 1) // 1024) + 1) * 1024
|
|
delta = size - len(self.charWidths)
|
|
if delta > 0:
|
|
self.charWidths += [default] * delta
|
|
nCharWidths = 0
|
|
if ((numberOfHMetrics*4) < self.maxStrLenRead):
|
|
data = self.get_chunk(start,(numberOfHMetrics*4))
|
|
arr = unpack(">%dH" % (len(data)//2), data)
|
|
else:
|
|
self.seek(start)
|
|
for glyph in range(numberOfHMetrics):
|
|
if ((numberOfHMetrics*4) < self.maxStrLenRead):
|
|
aw = arr[(glyph*2)] # PHP starts arrays from index 0!? +1
|
|
else:
|
|
aw = self.read_ushort()
|
|
lsb = self.read_ushort()
|
|
|
|
if (glyph in glyphToChar or glyph == 0):
|
|
if (aw >= (1 << 15) ):
|
|
aw = 0 # 1.03 Some (arabic) fonts have -ve values for width
|
|
# although should be unsigned value - comes out as e.g. 65108 (intended -50)
|
|
if (glyph == 0):
|
|
self.defaultWidth = scale*aw
|
|
continue
|
|
|
|
for char in glyphToChar[glyph]:
|
|
if (char != 0 and char != 65535):
|
|
w = int(round(scale*aw+0.001)) # ROUND_HALF_UP in PY3K (like php)
|
|
if (w == 0): w = 65535
|
|
if (char < 196608):
|
|
if char >= len(self.charWidths):
|
|
resize_cw(char, self.defaultWidth)
|
|
self.charWidths[char] = w
|
|
nCharWidths += 1
|
|
|
|
|
|
data = self.get_chunk((start+numberOfHMetrics*4),(numGlyphs*2))
|
|
arr = unpack(">%dH" % (len(data)//2), data)
|
|
diff = numGlyphs-numberOfHMetrics
|
|
for pos in range(diff):
|
|
glyph = pos + numberOfHMetrics
|
|
if (glyph in glyphToChar):
|
|
for char in glyphToChar[glyph]:
|
|
if (char != 0 and char != 65535):
|
|
w = int(round(scale*aw+0.001)) # ROUND_HALF_UP in PY3K (like php)
|
|
if (w == 0): w = 65535
|
|
if (char < 196608):
|
|
if char >= len(self.charWidths):
|
|
resize_cw(char, self.defaultWidth)
|
|
self.charWidths[char] = w
|
|
nCharWidths += 1
|
|
|
|
|
|
# NB 65535 is a set width of 0
|
|
# First bytes define number of chars in font
|
|
self.charWidths[0] = nCharWidths
|
|
|
|
|
|
def getHMetric(self, numberOfHMetrics, gid):
|
|
start = self.seek_table("hmtx")
|
|
if (gid < numberOfHMetrics):
|
|
self.seek(start+(gid*4))
|
|
hm = self.fh.read(4)
|
|
else:
|
|
self.seek(start+((numberOfHMetrics-1)*4))
|
|
hm = self.fh.read(2)
|
|
self.seek(start+(numberOfHMetrics*2)+(gid*2))
|
|
hm += self.fh.read(2)
|
|
return hm
|
|
|
|
|
|
def getLOCA(self, indexToLocFormat, numGlyphs):
|
|
start = self.seek_table('loca')
|
|
self.glyphPos = []
|
|
if (indexToLocFormat == 0):
|
|
data = self.get_chunk(start,(numGlyphs*2)+2)
|
|
arr = unpack(">%dH" % (len(data)//2), data)
|
|
for n in range(numGlyphs):
|
|
self.glyphPos.append((arr[n] * 2)) # n+1 !?
|
|
elif (indexToLocFormat == 1):
|
|
data = self.get_chunk(start,(numGlyphs*4)+4)
|
|
arr = unpack(">%dL" % (len(data)//4), data)
|
|
for n in range(numGlyphs):
|
|
self.glyphPos.append((arr[n])) # n+1 !?
|
|
else:
|
|
die('Unknown location table format ' + indexToLocFormat)
|
|
|
|
# CMAP Format 4
|
|
def getCMAP4(self, unicode_cmap_offset, glyphToChar, charToGlyph):
|
|
self.maxUniChar = 0
|
|
self.seek(unicode_cmap_offset + 2)
|
|
length = self.read_ushort()
|
|
limit = unicode_cmap_offset + length
|
|
self.skip(2)
|
|
|
|
segCount = self.read_ushort() // 2
|
|
self.skip(6)
|
|
endCount = []
|
|
for i in range(segCount):
|
|
endCount.append(self.read_ushort())
|
|
self.skip(2)
|
|
startCount = []
|
|
for i in range(segCount):
|
|
startCount.append(self.read_ushort())
|
|
idDelta = []
|
|
for i in range(segCount):
|
|
idDelta.append(self.read_short()) # ???? was unsigned short
|
|
idRangeOffset_start = self._pos
|
|
idRangeOffset = []
|
|
for i in range(segCount):
|
|
idRangeOffset.append(self.read_ushort())
|
|
|
|
for n in range(segCount):
|
|
endpoint = (endCount[n] + 1)
|
|
for unichar in range(startCount[n], endpoint, 1):
|
|
if (idRangeOffset[n] == 0):
|
|
glyph = (unichar + idDelta[n]) & 0xFFFF
|
|
else:
|
|
offset = (unichar - startCount[n]) * 2 + idRangeOffset[n]
|
|
offset = idRangeOffset_start + 2 * n + offset
|
|
if (offset >= limit):
|
|
glyph = 0
|
|
else:
|
|
glyph = self.get_ushort(offset)
|
|
if (glyph != 0):
|
|
glyph = (glyph + idDelta[n]) & 0xFFFF
|
|
|
|
charToGlyph[unichar] = glyph
|
|
if (unichar < 196608):
|
|
self.maxUniChar = max(unichar,self.maxUniChar)
|
|
glyphToChar.setdefault(glyph, []).append(unichar)
|
|
|
|
# CMAP Format 12
|
|
def getCMAP12(self, unicode_cmap_offset, glyphToChar, charToGlyph):
|
|
self.maxUniChar = 0
|
|
# table (skip format version, should be 12)
|
|
self.seek(unicode_cmap_offset + 2)
|
|
# reserved
|
|
self.skip(2)
|
|
# table length
|
|
length = self.read_ulong()
|
|
# language (should be 0)
|
|
self.skip(4)
|
|
# groups count
|
|
grpCount = self.read_ulong()
|
|
|
|
if 2 + 2 + 4 + 4 + 4 + grpCount * 3 * 4 > length:
|
|
die("TTF format 12 cmap table too small")
|
|
for n in range(grpCount):
|
|
startCharCode = self.read_ulong()
|
|
endCharCode = self.read_ulong()
|
|
glyph = self.read_ulong()
|
|
for unichar in range(startCharCode, endCharCode + 1):
|
|
charToGlyph[unichar] = glyph
|
|
if (unichar < 196608):
|
|
self.maxUniChar = max(unichar, self.maxUniChar)
|
|
glyphToChar.setdefault(glyph, []).append(unichar)
|
|
glyph += 1
|
|
|
|
|
|
|
|
# Put the TTF file together
|
|
def endTTFile(self, stm):
|
|
stm = b('')
|
|
numTables = count(self.otables)
|
|
searchRange = 1
|
|
entrySelector = 0
|
|
while (searchRange * 2 <= numTables):
|
|
searchRange = searchRange * 2
|
|
entrySelector = entrySelector + 1
|
|
|
|
searchRange = searchRange * 16
|
|
rangeShift = numTables * 16 - searchRange
|
|
|
|
# Header
|
|
if (_TTF_MAC_HEADER):
|
|
stm += (pack(">LHHHH", 0x74727565, numTables, searchRange, entrySelector, rangeShift)) # Mac
|
|
else:
|
|
stm += (pack(">LHHHH", 0x00010000 , numTables, searchRange, entrySelector, rangeShift)) # Windows
|
|
|
|
|
|
# Table directory
|
|
tables = self.otables
|
|
|
|
offset = 12 + numTables * 16
|
|
sorted_tables = sorted(tables.items())
|
|
for tag, data in sorted_tables:
|
|
if (tag == 'head'):
|
|
head_start = offset
|
|
stm += tag.encode("latin1")
|
|
checksum = calcChecksum(data)
|
|
stm += pack(">HH", checksum[0],checksum[1])
|
|
stm += pack(">LL", offset, strlen(data))
|
|
paddedLength = (strlen(data)+3)&~3
|
|
offset = offset + paddedLength
|
|
|
|
# Table data
|
|
for tag, data in sorted_tables:
|
|
data += b("\0\0\0")
|
|
stm += substr(data,0,(strlen(data)&~3))
|
|
|
|
checksum = calcChecksum(stm)
|
|
checksum = sub32((0xB1B0,0xAFBA), checksum)
|
|
chk = pack(">HH", checksum[0],checksum[1])
|
|
stm = self.splice(stm,(head_start + 8),chk)
|
|
return stm
|
|
|