Skip to main content

mos_fonts/
embedded.rs

1//! Embedded TrueType faces + shaping.
2//!
3//! [`EmbeddedFont`] holds a bundled TTF's bytes plus a pre-parsed
4//! `rustybuzz::Face` and the FontDescriptor-relevant metrics the PDF
5//! emit path needs. [`shape`] runs `rustybuzz` over a UTF-8 string and
6//! returns a [`ShapedGlyph`] stream with GPOS advances and offsets
7//! preserved. [`subset`] reduces a face to just the glyph IDs used in
8//! one document and returns the trimmed bytes suitable for a
9//! `/FontFile2` stream.
10
11use std::collections::HashMap;
12use std::sync::{Arc, PoisonError, RwLock};
13
14use rustybuzz::{Direction, Face, Language, Script, ShapePlan, UnicodeBuffer};
15
16/// One glyph in a shaped run. Cluster values are byte offsets into the
17/// source UTF-8 string.
18#[derive(Copy, Clone, Debug, PartialEq, Eq)]
19pub struct ShapedGlyph {
20    /// Glyph ID into the source font. Becomes the CID in the emitted
21    /// PDF (we use `/CIDToGIDMap /Identity`).
22    pub gid: u16,
23    /// Horizontal advance in font units after OpenType positioning.
24    pub advance_units: i32,
25    /// Horizontal offset to apply before drawing this glyph, in font units.
26    pub x_offset_units: i32,
27    /// Vertical offset to apply before drawing this glyph, in font units.
28    pub y_offset_units: i32,
29    /// Byte offset of this glyph's grapheme cluster in the source
30    /// string. Monotonically non-decreasing across a LTR run.
31    pub cluster: u32,
32}
33
34/// Per-face cache of compiled `rustybuzz` shape plans, keyed by the
35/// `(script, language)` a buffer resolves to after segment-property guessing.
36/// `Arc` so the hit path can clone the handle out and shape with the lock
37/// released.
38type ShapePlanCache = RwLock<HashMap<(Script, Option<Language>), Arc<ShapePlan>>>;
39
40/// A bundled `TrueType` face: the raw bytes plus the metrics and
41/// parsed `rustybuzz::Face` needed to shape text and emit a PDF
42/// `FontDescriptor`.
43///
44/// Constructed internally from a `&'static [u8]` (the bundled
45/// `include_bytes!`-loaded TTF). The crate's user-facing surface is
46/// the [`crate::EmbeddedFontId`] enum; this struct is the per-cut
47/// data block those ids resolve through.
48pub struct EmbeddedFont {
49    /// Raw TTF bytes. Held statically so the parsed `Face<'static>`
50    /// can borrow them.
51    pub bytes: &'static [u8],
52    /// `HarfBuzz`/`rustybuzz` face. Borrows `bytes`.
53    pub face: Face<'static>,
54    /// Pre-parsed `ttf-parser` face. The PDF backend reads
55    /// `FontDescriptor` fields (italic angle, bbox, …) through this;
56    /// `rustybuzz` wraps it but doesn't re-expose every getter.
57    pub ttf: ttf_parser::Face<'static>,
58    /// `PostScript` name (from the `name` table, ID 6). Becomes the
59    /// `/BaseFont` entry's suffix after the six-letter subset tag.
60    pub postscript_name: &'static str,
61    /// `head.unitsPerEm`. Typically 1000 (CFF) or a power of two for
62    /// `TrueType` outlines.
63    pub units_per_em: u16,
64    /// `hhea.ascender` (font units).
65    pub ascender: i16,
66    /// `hhea.descender` (font units, typically negative).
67    pub descender: i16,
68    /// `OS/2.sCapHeight` if present, else `ascender * 7 / 10` as a
69    /// PDF-conventional fallback.
70    pub cap_height: i16,
71    /// `OS/2.sxHeight` if present, else `ascender * 1 / 2` as a
72    /// fallback.
73    pub x_height: i16,
74    /// `post.italicAngle` in degrees. OpenType and PDF `/ItalicAngle`
75    /// share the same convention (counter-clockwise from vertical,
76    /// negative for italic slanted right per PDF 1.7 §9.8.2), so the
77    /// value passes through unchanged.
78    pub italic_angle: f32,
79    /// `head` font bounding box (xMin, yMin, xMax, yMax). Becomes
80    /// `FontDescriptor` `/FontBBox`.
81    pub bbox: (i16, i16, i16, i16),
82    /// Heuristic stem-vertical width for `/StemV`: 80 for regular,
83    /// 120 for bold. `ttf-parser` doesn't surface a reliable `StemV`;
84    /// most fonts don't ship it in `OS/2`. PDF validators accept the
85    /// heuristic.
86    pub stem_v: i16,
87    /// PDF `FontDescriptor` `/Flags`. Nonsymbolic (bit 6, value 32) for
88    /// Latin/Cyrillic/Greek fonts; the italic bit (bit 7, value 64)
89    /// is OR'd in for italic cuts.
90    pub flags: u32,
91    /// Compiled `rustybuzz` shape plans, keyed by the buffer's
92    /// `(script, language)` after segment-property guessing. A plan is the
93    /// compiled GSUB/GPOS feature program for a
94    /// `(face, script, language, LTR, no user features)` tuple; it is
95    /// invariant across the thousands of per-run shaping calls, so building
96    /// it once per script (Latin dominates real text) replaces the plan
97    /// recompilation `rustybuzz::shape` does on *every* call — ~22% of build
98    /// time in profiling. `RwLock` because the font is shared `&'static`;
99    /// the hit path (the common case) only needs a read lock. Plans are
100    /// `Arc`-wrapped so the hit path can clone the handle out and release
101    /// the lock before shaping.
102    plan_cache: ShapePlanCache,
103}
104
105impl std::fmt::Debug for EmbeddedFont {
106    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107        f.debug_struct("EmbeddedFont")
108            .field("postscript_name", &self.postscript_name)
109            .field("units_per_em", &self.units_per_em)
110            .field("ascender", &self.ascender)
111            .field("descender", &self.descender)
112            .field("italic_angle", &self.italic_angle)
113            .field("bbox", &self.bbox)
114            .finish()
115    }
116}
117
118impl EmbeddedFont {
119    /// Parse a bundled TTF blob into an [`EmbeddedFont`]. The blob
120    /// must outlive the program (which it does: bundled cuts come
121    /// from `include_bytes!` and are baked into the binary).
122    ///
123    /// `postscript_name`, `is_bold`, and `is_italic` are provided by
124    /// the caller rather than read from the `name` table because the
125    /// bundled cuts are known statics and parse-time string ownership
126    /// would require allocating; the `name` table also ships
127    /// platform-specific encodings we don't want to navigate.
128    ///
129    /// # Panics
130    ///
131    /// Panics if the bytes don't parse as a `TrueType` font. The four
132    /// bundled cuts have been parse-verified at vendor time and are
133    /// re-verified by `tests/parse_bundled.rs` on every CI run, so
134    /// reaching this panic requires post-build corruption (e.g. a
135    /// failed LFS pull or a truncated binary). Threading
136    /// `Result`/`Option` through the dozens of downstream call sites
137    /// to handle a case the compile-time `include_bytes!` already
138    /// rules out would make the code materially worse; the lint
139    /// suppression is the explicit CLAUDE.md exception, paired with
140    /// the CI test that catches the only realistic failure mode.
141    #[must_use]
142    #[allow(
143        clippy::expect_used,
144        reason = "bundled bytes are include_bytes!-baked and CI-verified by \
145                  tests/parse_bundled.rs; propagating Option would force every \
146                  downstream caller fallible for an unreachable path"
147    )]
148    pub(crate) fn from_static(
149        bytes: &'static [u8],
150        postscript_name: &'static str,
151        is_bold: bool,
152        is_italic: bool,
153    ) -> Self {
154        let ttf = ttf_parser::Face::parse(bytes, 0)
155            .expect("bundled font bytes failed to parse as TrueType: repository corruption?");
156        let face = Face::from_face(ttf.clone());
157
158        let units_per_em = ttf.units_per_em();
159        let ascender = ttf.ascender();
160        let descender = ttf.descender();
161        let cap_height = ttf.capital_height().map_or(ascender * 7 / 10, i16::from);
162        let x_height = ttf.x_height().map_or(ascender / 2, i16::from);
163        let italic_angle = ttf.italic_angle();
164        let global_bbox = ttf.global_bounding_box();
165        let bbox = (
166            global_bbox.x_min,
167            global_bbox.y_min,
168            global_bbox.x_max,
169            global_bbox.y_max,
170        );
171
172        // PDF FontDescriptor flag bits (PDF 1.7 §9.8.2 Table 123):
173        //   bit 6 (value 32)  Nonsymbolic: character set is standard
174        //                     Adobe-Latin (covers extended Latin and
175        //                     anything Unicode-addressable that doesn't
176        //                     deliberately use a symbol encoding).
177        //   bit 7 (value 64)  Italic.
178        // The Symbolic bit (bit 3, value 4) is mutually exclusive with
179        // Nonsymbolic and only applies to faces like Symbol/Dingbats.
180        let mut flags: u32 = 0x20;
181        if is_italic {
182            flags |= 0x40;
183        }
184
185        let stem_v: i16 = if is_bold { 120 } else { 80 };
186
187        Self {
188            bytes,
189            face,
190            ttf,
191            postscript_name,
192            units_per_em,
193            ascender,
194            descender,
195            cap_height,
196            x_height,
197            italic_angle,
198            bbox,
199            stem_v,
200            flags,
201            plan_cache: RwLock::new(HashMap::new()),
202        }
203    }
204
205    /// Look up the GID for a Unicode codepoint, if the face covers it.
206    /// Used by the layout engine's `glyph_width` shortcut when shaping
207    /// a single codepoint would be wasteful.
208    #[must_use]
209    pub fn glyph_index(&self, ch: char) -> Option<u16> {
210        self.ttf.glyph_index(ch).map(|g| g.0)
211    }
212
213    /// Horizontal advance for `gid` in font units, sourced from the
214    /// `hmtx` table.
215    #[must_use]
216    pub fn advance_units(&self, gid: u16) -> u16 {
217        self.ttf
218            .glyph_hor_advance(ttf_parser::GlyphId(gid))
219            .unwrap_or(0)
220    }
221}
222
223/// Shape `text` against `font` using `rustybuzz`. Returns the glyph
224/// stream in visual order (LTR for this slice). Glyph IDs, advances,
225/// and offsets come from `rustybuzz`, so substitutions, kerning, and
226/// combining-mark positioning are preserved. An empty `text` returns
227/// an empty `Vec` without invoking the shaper.
228#[must_use]
229pub fn shape(font: &EmbeddedFont, text: &str) -> Vec<ShapedGlyph> {
230    if text.is_empty() {
231        return Vec::new();
232    }
233    let mut buffer = UnicodeBuffer::new();
234    buffer.push_str(text);
235    // We let rustybuzz infer script and language from the buffer's
236    // codepoints; the slice is LTR-only so we force horizontal LTR
237    // explicitly to avoid the inference picking RTL for an Arabic
238    // word the user typed.
239    buffer.set_direction(Direction::LeftToRight);
240    buffer.guess_segment_properties();
241    // Force LTR back: guess_segment_properties may flip direction
242    // based on script. This slice is LTR-only by scope.
243    buffer.set_direction(Direction::LeftToRight);
244    // `rustybuzz::shape` recompiles the OpenType shape plan on every call;
245    // that recompilation dominated build time (~22% in profiling). The plan
246    // depends only on `(face, direction, script, language, user features)` —
247    // direction is always LTR here and features are empty — so cache it by
248    // `(script, language)`. Building the plan with these exact arguments
249    // reproduces `rustybuzz::shape`'s plan, so shaped output is byte-identical.
250    let key = (buffer.script(), buffer.language());
251    // Clone the plan handle out under the lock, then shape with the lock
252    // released. A poisoned lock is recovered rather than panicked on, since a
253    // stale plan cache is harmless.
254    let cached = font
255        .plan_cache
256        .read()
257        .unwrap_or_else(PoisonError::into_inner)
258        .get(&key)
259        .map(Arc::clone);
260    let plan = cached.unwrap_or_else(|| {
261        let plan = Arc::new(ShapePlan::new(
262            &font.face,
263            Direction::LeftToRight,
264            Some(key.0),
265            key.1.as_ref(),
266            &[],
267        ));
268        font.plan_cache
269            .write()
270            .unwrap_or_else(PoisonError::into_inner)
271            .entry(key)
272            .or_insert_with(|| Arc::clone(&plan));
273        plan
274    });
275    let glyph_buffer = rustybuzz::shape_with_plan(&font.face, &plan, buffer);
276    let infos = glyph_buffer.glyph_infos();
277    let positions = glyph_buffer.glyph_positions();
278    let mut out = Vec::with_capacity(infos.len());
279    for (info, pos) in infos.iter().zip(positions.iter()) {
280        // rustybuzz documents `glyph_id` as `<= u16::MAX`; the cast is
281        // truncation-safe per that contract. Guard with a `try_from`
282        // anyway so a future rustybuzz drift surfaces as gid 0
283        // (rendered as `.notdef`) rather than silent wrap.
284        let gid = u16::try_from(info.glyph_id).unwrap_or(0);
285        out.push(ShapedGlyph {
286            gid,
287            advance_units: pos.x_advance,
288            x_offset_units: pos.x_offset,
289            y_offset_units: pos.y_offset,
290            cluster: info.cluster,
291        });
292    }
293    out
294}
295
296/// Subset `font` to just the glyph IDs in `gids` (always include GID 0,
297/// `.notdef`, which the PDF spec mandates). Returns the trimmed TTF
298/// bytes suitable for embedding as a `/FontFile2` stream.
299///
300/// # Errors
301///
302/// Returns an error if the font's tables are malformed or use features
303/// the underlying [`subsetter`] crate doesn't support (CFF2). The
304/// bundled Noto Sans cuts are TrueType-flavoured and exercise the
305/// well-supported path.
306pub fn subset(font: &EmbeddedFont, gids: &[u16]) -> Result<Vec<u8>, SubsetError> {
307    let mut all = Vec::with_capacity(gids.len() + 1);
308    all.push(0_u16);
309    all.extend_from_slice(gids);
310    let remapper = subsetter::GlyphRemapper::new_from_glyphs(&all);
311    subsetter::subset(font.bytes, 0, &remapper).map_err(SubsetError)
312}
313
314/// Wraps [`subsetter::Error`] without exposing the dependency in the
315/// public API. The PDF emit path bails on this error with a
316/// `Diagnostic`. The inner variant is private: callers debug via
317/// the `Display`/`Debug` impls, not pattern matching on
318/// `subsetter::Error` directly.
319#[derive(Debug)]
320pub struct SubsetError(subsetter::Error);
321
322impl std::fmt::Display for SubsetError {
323    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
324        write!(f, "font subsetting failed: {:?}", self.0)
325    }
326}
327
328impl std::error::Error for SubsetError {}