mos_fonts/embedded.rs
1//! Embedded TrueType faces + shaping.
2//!
3//! [`EmbeddedFont`] holds a bundled TTF's bytes plus a pre-parsed
4//! `rustybuzz::Face` and the FontDescriptor-relevant metrics the PDF
5//! emit path needs. [`shape`] runs `rustybuzz` over a UTF-8 string and
6//! returns a [`ShapedGlyph`] stream with GPOS advances and offsets
7//! preserved. [`subset`] reduces a face to just the glyph IDs used in
8//! one document and returns the trimmed bytes suitable for a
9//! `/FontFile2` stream.
10
11use std::collections::HashMap;
12use std::sync::{Arc, PoisonError, RwLock};
13
14use rustybuzz::{Direction, Face, Language, Script, ShapePlan, UnicodeBuffer};
15
16/// One glyph in a shaped run. Cluster values are byte offsets into the
17/// source UTF-8 string.
18#[derive(Copy, Clone, Debug, PartialEq, Eq)]
19pub struct ShapedGlyph {
20 /// Glyph ID into the source font. Becomes the CID in the emitted
21 /// PDF (we use `/CIDToGIDMap /Identity`).
22 pub gid: u16,
23 /// Horizontal advance in font units after OpenType positioning.
24 pub advance_units: i32,
25 /// Horizontal offset to apply before drawing this glyph, in font units.
26 pub x_offset_units: i32,
27 /// Vertical offset to apply before drawing this glyph, in font units.
28 pub y_offset_units: i32,
29 /// Byte offset of this glyph's grapheme cluster in the source
30 /// string. Monotonically non-decreasing across a LTR run.
31 pub cluster: u32,
32}
33
34/// Per-face cache of compiled `rustybuzz` shape plans, keyed by the
35/// `(script, language)` a buffer resolves to after segment-property guessing.
36/// `Arc` so the hit path can clone the handle out and shape with the lock
37/// released.
38type ShapePlanCache = RwLock<HashMap<(Script, Option<Language>), Arc<ShapePlan>>>;
39
40/// A bundled `TrueType` face: the raw bytes plus the metrics and
41/// parsed `rustybuzz::Face` needed to shape text and emit a PDF
42/// `FontDescriptor`.
43///
44/// Constructed internally from a `&'static [u8]` (the bundled
45/// `include_bytes!`-loaded TTF). The crate's user-facing surface is
46/// the [`crate::EmbeddedFontId`] enum; this struct is the per-cut
47/// data block those ids resolve through.
48pub struct EmbeddedFont {
49 /// Raw TTF bytes. Held statically so the parsed `Face<'static>`
50 /// can borrow them.
51 pub bytes: &'static [u8],
52 /// `HarfBuzz`/`rustybuzz` face. Borrows `bytes`.
53 pub face: Face<'static>,
54 /// Pre-parsed `ttf-parser` face. The PDF backend reads
55 /// `FontDescriptor` fields (italic angle, bbox, …) through this;
56 /// `rustybuzz` wraps it but doesn't re-expose every getter.
57 pub ttf: ttf_parser::Face<'static>,
58 /// `PostScript` name (from the `name` table, ID 6). Becomes the
59 /// `/BaseFont` entry's suffix after the six-letter subset tag.
60 pub postscript_name: &'static str,
61 /// `head.unitsPerEm`. Typically 1000 (CFF) or a power of two for
62 /// `TrueType` outlines.
63 pub units_per_em: u16,
64 /// `hhea.ascender` (font units).
65 pub ascender: i16,
66 /// `hhea.descender` (font units, typically negative).
67 pub descender: i16,
68 /// `OS/2.sCapHeight` if present, else `ascender * 7 / 10` as a
69 /// PDF-conventional fallback.
70 pub cap_height: i16,
71 /// `OS/2.sxHeight` if present, else `ascender * 1 / 2` as a
72 /// fallback.
73 pub x_height: i16,
74 /// `post.italicAngle` in degrees. OpenType and PDF `/ItalicAngle`
75 /// share the same convention (counter-clockwise from vertical,
76 /// negative for italic slanted right per PDF 1.7 §9.8.2), so the
77 /// value passes through unchanged.
78 pub italic_angle: f32,
79 /// `head` font bounding box (xMin, yMin, xMax, yMax). Becomes
80 /// `FontDescriptor` `/FontBBox`.
81 pub bbox: (i16, i16, i16, i16),
82 /// Heuristic stem-vertical width for `/StemV`: 80 for regular,
83 /// 120 for bold. `ttf-parser` doesn't surface a reliable `StemV`;
84 /// most fonts don't ship it in `OS/2`. PDF validators accept the
85 /// heuristic.
86 pub stem_v: i16,
87 /// PDF `FontDescriptor` `/Flags`. Nonsymbolic (bit 6, value 32) for
88 /// Latin/Cyrillic/Greek fonts; the italic bit (bit 7, value 64)
89 /// is OR'd in for italic cuts.
90 pub flags: u32,
91 /// Compiled `rustybuzz` shape plans, keyed by the buffer's
92 /// `(script, language)` after segment-property guessing. A plan is the
93 /// compiled GSUB/GPOS feature program for a
94 /// `(face, script, language, LTR, no user features)` tuple; it is
95 /// invariant across the thousands of per-run shaping calls, so building
96 /// it once per script (Latin dominates real text) replaces the plan
97 /// recompilation `rustybuzz::shape` does on *every* call — ~22% of build
98 /// time in profiling. `RwLock` because the font is shared `&'static`;
99 /// the hit path (the common case) only needs a read lock. Plans are
100 /// `Arc`-wrapped so the hit path can clone the handle out and release
101 /// the lock before shaping.
102 plan_cache: ShapePlanCache,
103}
104
105impl std::fmt::Debug for EmbeddedFont {
106 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107 f.debug_struct("EmbeddedFont")
108 .field("postscript_name", &self.postscript_name)
109 .field("units_per_em", &self.units_per_em)
110 .field("ascender", &self.ascender)
111 .field("descender", &self.descender)
112 .field("italic_angle", &self.italic_angle)
113 .field("bbox", &self.bbox)
114 .finish()
115 }
116}
117
118impl EmbeddedFont {
119 /// Parse a bundled TTF blob into an [`EmbeddedFont`]. The blob
120 /// must outlive the program (which it does: bundled cuts come
121 /// from `include_bytes!` and are baked into the binary).
122 ///
123 /// `postscript_name`, `is_bold`, and `is_italic` are provided by
124 /// the caller rather than read from the `name` table because the
125 /// bundled cuts are known statics and parse-time string ownership
126 /// would require allocating; the `name` table also ships
127 /// platform-specific encodings we don't want to navigate.
128 ///
129 /// # Panics
130 ///
131 /// Panics if the bytes don't parse as a `TrueType` font. The four
132 /// bundled cuts have been parse-verified at vendor time and are
133 /// re-verified by `tests/parse_bundled.rs` on every CI run, so
134 /// reaching this panic requires post-build corruption (e.g. a
135 /// failed LFS pull or a truncated binary). Threading
136 /// `Result`/`Option` through the dozens of downstream call sites
137 /// to handle a case the compile-time `include_bytes!` already
138 /// rules out would make the code materially worse; the lint
139 /// suppression is the explicit CLAUDE.md exception, paired with
140 /// the CI test that catches the only realistic failure mode.
141 #[must_use]
142 #[allow(
143 clippy::expect_used,
144 reason = "bundled bytes are include_bytes!-baked and CI-verified by \
145 tests/parse_bundled.rs; propagating Option would force every \
146 downstream caller fallible for an unreachable path"
147 )]
148 pub(crate) fn from_static(
149 bytes: &'static [u8],
150 postscript_name: &'static str,
151 is_bold: bool,
152 is_italic: bool,
153 ) -> Self {
154 let ttf = ttf_parser::Face::parse(bytes, 0)
155 .expect("bundled font bytes failed to parse as TrueType: repository corruption?");
156 let face = Face::from_face(ttf.clone());
157
158 let units_per_em = ttf.units_per_em();
159 let ascender = ttf.ascender();
160 let descender = ttf.descender();
161 let cap_height = ttf.capital_height().map_or(ascender * 7 / 10, i16::from);
162 let x_height = ttf.x_height().map_or(ascender / 2, i16::from);
163 let italic_angle = ttf.italic_angle();
164 let global_bbox = ttf.global_bounding_box();
165 let bbox = (
166 global_bbox.x_min,
167 global_bbox.y_min,
168 global_bbox.x_max,
169 global_bbox.y_max,
170 );
171
172 // PDF FontDescriptor flag bits (PDF 1.7 §9.8.2 Table 123):
173 // bit 6 (value 32) Nonsymbolic: character set is standard
174 // Adobe-Latin (covers extended Latin and
175 // anything Unicode-addressable that doesn't
176 // deliberately use a symbol encoding).
177 // bit 7 (value 64) Italic.
178 // The Symbolic bit (bit 3, value 4) is mutually exclusive with
179 // Nonsymbolic and only applies to faces like Symbol/Dingbats.
180 let mut flags: u32 = 0x20;
181 if is_italic {
182 flags |= 0x40;
183 }
184
185 let stem_v: i16 = if is_bold { 120 } else { 80 };
186
187 Self {
188 bytes,
189 face,
190 ttf,
191 postscript_name,
192 units_per_em,
193 ascender,
194 descender,
195 cap_height,
196 x_height,
197 italic_angle,
198 bbox,
199 stem_v,
200 flags,
201 plan_cache: RwLock::new(HashMap::new()),
202 }
203 }
204
205 /// Look up the GID for a Unicode codepoint, if the face covers it.
206 /// Used by the layout engine's `glyph_width` shortcut when shaping
207 /// a single codepoint would be wasteful.
208 #[must_use]
209 pub fn glyph_index(&self, ch: char) -> Option<u16> {
210 self.ttf.glyph_index(ch).map(|g| g.0)
211 }
212
213 /// Horizontal advance for `gid` in font units, sourced from the
214 /// `hmtx` table.
215 #[must_use]
216 pub fn advance_units(&self, gid: u16) -> u16 {
217 self.ttf
218 .glyph_hor_advance(ttf_parser::GlyphId(gid))
219 .unwrap_or(0)
220 }
221}
222
223/// Shape `text` against `font` using `rustybuzz`. Returns the glyph
224/// stream in visual order (LTR for this slice). Glyph IDs, advances,
225/// and offsets come from `rustybuzz`, so substitutions, kerning, and
226/// combining-mark positioning are preserved. An empty `text` returns
227/// an empty `Vec` without invoking the shaper.
228#[must_use]
229pub fn shape(font: &EmbeddedFont, text: &str) -> Vec<ShapedGlyph> {
230 if text.is_empty() {
231 return Vec::new();
232 }
233 let mut buffer = UnicodeBuffer::new();
234 buffer.push_str(text);
235 // We let rustybuzz infer script and language from the buffer's
236 // codepoints; the slice is LTR-only so we force horizontal LTR
237 // explicitly to avoid the inference picking RTL for an Arabic
238 // word the user typed.
239 buffer.set_direction(Direction::LeftToRight);
240 buffer.guess_segment_properties();
241 // Force LTR back: guess_segment_properties may flip direction
242 // based on script. This slice is LTR-only by scope.
243 buffer.set_direction(Direction::LeftToRight);
244 // `rustybuzz::shape` recompiles the OpenType shape plan on every call;
245 // that recompilation dominated build time (~22% in profiling). The plan
246 // depends only on `(face, direction, script, language, user features)` —
247 // direction is always LTR here and features are empty — so cache it by
248 // `(script, language)`. Building the plan with these exact arguments
249 // reproduces `rustybuzz::shape`'s plan, so shaped output is byte-identical.
250 let key = (buffer.script(), buffer.language());
251 // Clone the plan handle out under the lock, then shape with the lock
252 // released. A poisoned lock is recovered rather than panicked on, since a
253 // stale plan cache is harmless.
254 let cached = font
255 .plan_cache
256 .read()
257 .unwrap_or_else(PoisonError::into_inner)
258 .get(&key)
259 .map(Arc::clone);
260 let plan = cached.unwrap_or_else(|| {
261 let plan = Arc::new(ShapePlan::new(
262 &font.face,
263 Direction::LeftToRight,
264 Some(key.0),
265 key.1.as_ref(),
266 &[],
267 ));
268 font.plan_cache
269 .write()
270 .unwrap_or_else(PoisonError::into_inner)
271 .entry(key)
272 .or_insert_with(|| Arc::clone(&plan));
273 plan
274 });
275 let glyph_buffer = rustybuzz::shape_with_plan(&font.face, &plan, buffer);
276 let infos = glyph_buffer.glyph_infos();
277 let positions = glyph_buffer.glyph_positions();
278 let mut out = Vec::with_capacity(infos.len());
279 for (info, pos) in infos.iter().zip(positions.iter()) {
280 // rustybuzz documents `glyph_id` as `<= u16::MAX`; the cast is
281 // truncation-safe per that contract. Guard with a `try_from`
282 // anyway so a future rustybuzz drift surfaces as gid 0
283 // (rendered as `.notdef`) rather than silent wrap.
284 let gid = u16::try_from(info.glyph_id).unwrap_or(0);
285 out.push(ShapedGlyph {
286 gid,
287 advance_units: pos.x_advance,
288 x_offset_units: pos.x_offset,
289 y_offset_units: pos.y_offset,
290 cluster: info.cluster,
291 });
292 }
293 out
294}
295
296/// Subset `font` to just the glyph IDs in `gids` (always include GID 0,
297/// `.notdef`, which the PDF spec mandates). Returns the trimmed TTF
298/// bytes suitable for embedding as a `/FontFile2` stream.
299///
300/// # Errors
301///
302/// Returns an error if the font's tables are malformed or use features
303/// the underlying [`subsetter`] crate doesn't support (CFF2). The
304/// bundled Noto Sans cuts are TrueType-flavoured and exercise the
305/// well-supported path.
306pub fn subset(font: &EmbeddedFont, gids: &[u16]) -> Result<Vec<u8>, SubsetError> {
307 let mut all = Vec::with_capacity(gids.len() + 1);
308 all.push(0_u16);
309 all.extend_from_slice(gids);
310 let remapper = subsetter::GlyphRemapper::new_from_glyphs(&all);
311 subsetter::subset(font.bytes, 0, &remapper).map_err(SubsetError)
312}
313
314/// Wraps [`subsetter::Error`] without exposing the dependency in the
315/// public API. The PDF emit path bails on this error with a
316/// `Diagnostic`. The inner variant is private: callers debug via
317/// the `Display`/`Debug` impls, not pattern matching on
318/// `subsetter::Error` directly.
319#[derive(Debug)]
320pub struct SubsetError(subsetter::Error);
321
322impl std::fmt::Display for SubsetError {
323 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
324 write!(f, "font subsetting failed: {:?}", self.0)
325 }
326}
327
328impl std::error::Error for SubsetError {}