Skip to main content

pdf_base14_metrics/
lib.rs

1//! Pre-parsed Adobe Core 14 PDF font metrics.
2//!
3//! The 14 PostScript faces every PDF 1.7-conformant viewer ships
4//! built-in: Helvetica × 4, Times × 4, Courier × 4, Symbol,
5//! `ZapfDingbats`: exposed as `&'static FontMetrics<'static>` constants
6//! that cost nothing at runtime. The AFM files are vendored from
7//! [`tecnickcom/tc-font-core14-afms`] under `data/`, parsed by the
8//! sibling [`adobe-font-metrics`] crate at build time (see `build.rs`),
9//! and baked into Rust statics in `$OUT_DIR/baked.rs`.
10//!
11
12//! [`tecnickcom/tc-font-core14-afms`]: https://github.com/tecnickcom/tc-font-core14-afms
13//! [`adobe-font-metrics`]: https://crates.io/crates/adobe-font-metrics
14//!
15//! # Quick start
16//!
17//! ```
18//! use pdf_base14_metrics::Base14Font;
19//!
20//! // Look up a glyph width by PostScript name.
21//! assert_eq!(Base14Font::Helvetica.glyph_width("A"), Some(667.0));
22//!
23//! // Or via PDF `WinAnsiEncoding` byte (Latin faces only).
24//! assert_eq!(Base14Font::Helvetica.winansi_width(b'A'), Some(667.0));
25//!
26//! // Iterate every Core 14 face in stable order.
27//! for f in Base14Font::ALL {
28//!     let m = f.metrics();
29//!     assert!(!m.character_metrics.is_empty());
30//! }
31//! ```
32//!
33//! # Encoding caveat: Symbol and `ZapfDingbats`
34//!
35//! [`Base14Font::winansi_width`] returns `None` for [`Base14Font::Symbol`]
36//! and [`Base14Font::ZapfDingbats`]: those fonts use their own
37//! PostScript encodings (Greek/math operators and named dingbats
38//! respectively), not `WinAnsi`. Querying them through a Latin-1 byte
39//! would be a category error; the byte `0x41` is `"A"` in `WinAnsi`
40//! but `"Alpha"` in Symbol. Callers must reach for the per-glyph
41//! [`Base14Font::glyph_width`] API for those two fonts.
42//!
43//! # License
44//!
45//! The crate's Rust source is MIT. The 14 vendored AFM files in
46//! `data/afm/` ship under Adobe's permissive Core 14 AFM license
47//! (`APAFML`); see `LICENSE-APAFML` in the crate root. The combined
48//! SPDX expression is `MIT AND APAFML`.
49
50#![doc(
51    html_logo_url = "https://mosaic.kjanat.dev/assets/A4.svg",
52    html_favicon_url = "https://mosaic.kjanat.dev/assets/A4.svg"
53)]
54#![deny(missing_docs)]
55
56pub use adobe_font_metrics::{BBox, CharacterMetric, FontMetrics, KerningPair};
57
58use std::borrow::Cow;
59
60mod agl_subset;
61mod winansi_char_map;
62mod winansi_table;
63
64// The generated file references `BBox`, `CharacterMetric`,
65// `FontMetrics`, `KerningPair`, and `Cow` unqualified; all are in
66// scope via the `pub use` and `use` above.
67include!(concat!(env!("OUT_DIR"), "/baked.rs"));
68
69/// One of the 14 standard PDF fonts every conformant PDF reader
70/// ships built in (PDF 1.7 §9.6.2.2).
71///
72/// Variants are listed in the canonical PDF order: the four
73/// Helvetica weights, four Times weights, four Courier weights,
74/// then Symbol and `ZapfDingbats`. [`Self::ALL`] iterates them in
75/// this order.
76///
77/// # Examples
78///
79/// ```
80/// use pdf_base14_metrics::Base14Font;
81///
82/// assert_eq!(Base14Font::ALL.len(), 14);
83/// assert_eq!(Base14Font::Helvetica.pdf_base_name(), "Helvetica");
84/// ```
85#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
86pub enum Base14Font {
87    /// Helvetica (regular).
88    Helvetica,
89    /// Helvetica Bold.
90    HelveticaBold,
91    /// Helvetica Oblique (regular weight, slanted).
92    HelveticaOblique,
93    /// Helvetica Bold Oblique.
94    HelveticaBoldOblique,
95    /// Times Roman (regular).
96    TimesRoman,
97    /// Times Bold.
98    TimesBold,
99    /// Times Italic.
100    TimesItalic,
101    /// Times Bold Italic.
102    TimesBoldItalic,
103    /// Courier (regular, monospace).
104    Courier,
105    /// Courier Bold (monospace).
106    CourierBold,
107    /// Courier Oblique (monospace, slanted).
108    CourierOblique,
109    /// Courier Bold Oblique (monospace).
110    CourierBoldOblique,
111    /// Adobe Symbol (Greek letters, math operators).
112    Symbol,
113    /// ITC Zapf Dingbats (decorative glyphs).
114    ZapfDingbats,
115}
116
117impl Base14Font {
118    /// Every Core 14 face in stable PDF order.
119    pub const ALL: [Self; 14] = [
120        Self::Helvetica,
121        Self::HelveticaBold,
122        Self::HelveticaOblique,
123        Self::HelveticaBoldOblique,
124        Self::TimesRoman,
125        Self::TimesBold,
126        Self::TimesItalic,
127        Self::TimesBoldItalic,
128        Self::Courier,
129        Self::CourierBold,
130        Self::CourierOblique,
131        Self::CourierBoldOblique,
132        Self::Symbol,
133        Self::ZapfDingbats,
134    ];
135
136    /// Borrows the pre-parsed Adobe AFM metrics for this face.
137    ///
138    /// # Examples
139    ///
140    /// ```
141    /// use pdf_base14_metrics::Base14Font;
142    ///
143    /// let metrics = Base14Font::Helvetica.metrics();
144    ///
145    /// assert_eq!(metrics.font_name, "Helvetica");
146    /// ```
147    #[must_use]
148    pub fn metrics(self) -> &'static FontMetrics<'static> {
149        match self {
150            Self::Helvetica => &HELVETICA,
151            Self::HelveticaBold => &HELVETICA_BOLD,
152            Self::HelveticaOblique => &HELVETICA_OBLIQUE,
153            Self::HelveticaBoldOblique => &HELVETICA_BOLDOBLIQUE,
154            Self::TimesRoman => &TIMES_ROMAN,
155            Self::TimesBold => &TIMES_BOLD,
156            Self::TimesItalic => &TIMES_ITALIC,
157            Self::TimesBoldItalic => &TIMES_BOLDITALIC,
158            Self::Courier => &COURIER,
159            Self::CourierBold => &COURIER_BOLD,
160            Self::CourierOblique => &COURIER_OBLIQUE,
161            Self::CourierBoldOblique => &COURIER_BOLDOBLIQUE,
162            Self::Symbol => &SYMBOL,
163            Self::ZapfDingbats => &ZAPFDINGBATS,
164        }
165    }
166
167    /// PDF `/BaseFont` name per PDF 1.7 §9.6.2.2. These are the
168    /// exact bytes a conformant PDF writer puts after `/BaseFont`
169    /// in a font resource dictionary.
170    ///
171    /// # Examples
172    ///
173    /// ```
174    /// use pdf_base14_metrics::Base14Font;
175    ///
176    /// assert_eq!(Base14Font::TimesBoldItalic.pdf_base_name(), "Times-BoldItalic");
177    /// ```
178    #[must_use]
179    pub fn pdf_base_name(self) -> &'static str {
180        match self {
181            Self::Helvetica => "Helvetica",
182            Self::HelveticaBold => "Helvetica-Bold",
183            Self::HelveticaOblique => "Helvetica-Oblique",
184            Self::HelveticaBoldOblique => "Helvetica-BoldOblique",
185            Self::TimesRoman => "Times-Roman",
186            Self::TimesBold => "Times-Bold",
187            Self::TimesItalic => "Times-Italic",
188            Self::TimesBoldItalic => "Times-BoldItalic",
189            Self::Courier => "Courier",
190            Self::CourierBold => "Courier-Bold",
191            Self::CourierOblique => "Courier-Oblique",
192            Self::CourierBoldOblique => "Courier-BoldOblique",
193            Self::Symbol => "Symbol",
194            Self::ZapfDingbats => "ZapfDingbats",
195        }
196    }
197
198    /// Width of the glyph with the given PostScript name, in 1/1000
199    /// em. Returns `None` if no such glyph exists in this font.
200    ///
201    /// This is an O(n) linear scan over the font's character metrics
202    /// (~315 entries for the Latin faces). Prefer
203    /// [`Self::winansi_width`] when querying by byte; that path
204    /// goes through a pre-baked O(1) table. For the Latin Core 12
205    /// faces, [`Self::glyph_width_by_name`] goes through a baked
206    /// sorted index instead and is O(log n).
207    ///
208    /// # Examples
209    ///
210    /// ```
211    /// use pdf_base14_metrics::Base14Font;
212    ///
213    /// assert_eq!(Base14Font::Helvetica.glyph_width("A"), Some(667.0));
214    /// ```
215    #[must_use]
216    pub fn glyph_width(self, name: &str) -> Option<f32> {
217        self.metrics()
218            .character_metrics
219            .iter()
220            .find(|c| c.name == name)
221            .map(|c| c.width_x)
222    }
223
224    /// Width of the glyph with the given PostScript name, looked up
225    /// through a baked sorted index. O(log n), allocation-free,
226    /// safe to call once per character per PDF page in tight loops.
227    ///
228    /// Returns `None` for [`Self::Symbol`] and [`Self::ZapfDingbats`]
229    ///: their AFMs are intentionally unindexed because those faces
230    /// don't participate in `/Differences`-style remapping. Callers
231    /// that need Symbol/Dingbat widths must use [`Self::glyph_width`].
232    ///
233    /// # Examples
234    ///
235    /// ```
236    /// use pdf_base14_metrics::Base14Font;
237    ///
238    /// assert_eq!(Base14Font::Helvetica.glyph_width_by_name("A"), Some(667.0));
239    /// assert_eq!(Base14Font::Symbol.glyph_width_by_name("Alpha"), None);
240    /// ```
241    #[must_use]
242    pub fn glyph_width_by_name(self, name: &str) -> Option<f32> {
243        let table = self.name_width_table()?;
244        table
245            .binary_search_by(|(n, _)| (*n).cmp(name))
246            .ok()
247            .map(|i| table[i].1)
248    }
249
250    /// Returns the baked `(name, width)` index for Latin Core 12
251    /// faces, or `None` for `Symbol`/`ZapfDingbats`.
252    fn name_width_table(self) -> Option<&'static [(&'static str, f32)]> {
253        match self {
254            Self::Symbol | Self::ZapfDingbats => None,
255            Self::Helvetica => Some(HELVETICA_NAME_WIDTHS),
256            Self::HelveticaBold => Some(HELVETICA_BOLD_NAME_WIDTHS),
257            Self::HelveticaOblique => Some(HELVETICA_OBLIQUE_NAME_WIDTHS),
258            Self::HelveticaBoldOblique => Some(HELVETICA_BOLDOBLIQUE_NAME_WIDTHS),
259            Self::TimesRoman => Some(TIMES_ROMAN_NAME_WIDTHS),
260            Self::TimesBold => Some(TIMES_BOLD_NAME_WIDTHS),
261            Self::TimesItalic => Some(TIMES_ITALIC_NAME_WIDTHS),
262            Self::TimesBoldItalic => Some(TIMES_BOLDITALIC_NAME_WIDTHS),
263            Self::Courier => Some(COURIER_NAME_WIDTHS),
264            Self::CourierBold => Some(COURIER_BOLD_NAME_WIDTHS),
265            Self::CourierOblique => Some(COURIER_OBLIQUE_NAME_WIDTHS),
266            Self::CourierBoldOblique => Some(COURIER_BOLDOBLIQUE_NAME_WIDTHS),
267        }
268    }
269
270    /// Width of the glyph at PDF `WinAnsiEncoding` byte `code`, in
271    /// 1/1000 em. Returns `None` when:
272    ///
273    /// - `code` is unmapped by PDF `WinAnsi` (control characters
274    ///   `0x00..=0x1F`, the gaps `0x7F` / `0x81` / `0x8D` / `0x8F`
275    ///   / `0x90` / `0x9D`); or
276    /// - `self` is [`Self::Symbol`] or [`Self::ZapfDingbats`].
277    ///   those fonts do not use `WinAnsi` (see the crate-level docs).
278    ///
279    /// Implemented as a single `[Option<f32>; 256]` indexed load
280    /// per call: the table is baked at build time alongside the
281    /// font metrics. Hot enough for `mos-fonts::text_width` to
282    /// call once per character per typeset paragraph.
283    ///
284    /// # Examples
285    ///
286    /// ```
287    /// use pdf_base14_metrics::Base14Font;
288    ///
289    /// assert_eq!(Base14Font::Helvetica.winansi_width(b'A'), Some(667.0));
290    /// assert_eq!(Base14Font::Symbol.winansi_width(b'A'), None);
291    /// ```
292    #[must_use]
293    pub fn winansi_width(self, code: u8) -> Option<f32> {
294        self.winansi_table().and_then(|t| t[code as usize])
295    }
296
297    /// The pre-baked `WinAnsi` width table, or `None` for fonts whose
298    /// canonical encoding isn't `WinAnsi`.
299    fn winansi_table(self) -> Option<&'static [Option<f32>; 256]> {
300        match self {
301            Self::Symbol | Self::ZapfDingbats => None,
302            Self::Helvetica => Some(&HELVETICA_WINANSI),
303            Self::HelveticaBold => Some(&HELVETICA_BOLD_WINANSI),
304            Self::HelveticaOblique => Some(&HELVETICA_OBLIQUE_WINANSI),
305            Self::HelveticaBoldOblique => Some(&HELVETICA_BOLDOBLIQUE_WINANSI),
306            Self::TimesRoman => Some(&TIMES_ROMAN_WINANSI),
307            Self::TimesBold => Some(&TIMES_BOLD_WINANSI),
308            Self::TimesItalic => Some(&TIMES_ITALIC_WINANSI),
309            Self::TimesBoldItalic => Some(&TIMES_BOLDITALIC_WINANSI),
310            Self::Courier => Some(&COURIER_WINANSI),
311            Self::CourierBold => Some(&COURIER_BOLD_WINANSI),
312            Self::CourierOblique => Some(&COURIER_OBLIQUE_WINANSI),
313            Self::CourierBoldOblique => Some(&COURIER_BOLDOBLIQUE_WINANSI),
314        }
315    }
316}
317
318/// Returns the PostScript glyph name assigned to PDF `WinAnsiEncoding`
319/// byte `code`, or `None` for unmapped codes.
320///
321/// PDF `WinAnsi` is **not** Microsoft CP1252; see PDF 1.7 Annex D.2
322/// for the canonical table. The two encodings differ at codes
323/// `0x7F`, `0x81`, `0x8D`, `0x8F`, `0x90`, and `0x9D` (gaps in PDF,
324/// assorted glyphs or DEL in CP1252).
325///
326/// This is exposed primarily so downstream crates (e.g.
327/// `mos-fonts`) can delegate to the canonical table rather than
328/// maintain their own copy.
329///
330/// # Examples
331///
332/// ```
333/// use pdf_base14_metrics::winansi_glyph_name;
334///
335/// assert_eq!(winansi_glyph_name(b'A'), Some("A"));
336/// assert_eq!(winansi_glyph_name(0x7F), None);
337/// ```
338#[must_use]
339pub fn winansi_glyph_name(code: u8) -> Option<&'static str> {
340    winansi_table::WINANSI_TABLE[code as usize]
341}
342
343/// Returns the PDF `WinAnsiEncoding` byte that encodes `ch`, or
344/// `None` if `ch` has no slot in `WinAnsi`.
345///
346/// The inverse of the byte→char mapping transcribed from
347/// PDF 1.7 Annex D.2 Table D.2 into
348/// `winansi_char_map::WINANSI_CHAR_MAP`. Returns `None` for:
349///
350/// - Characters that have no glyph in `WinAnsi` (Cyrillic, CJK,
351///   most accented Vietnamese, etc.).
352/// - The six `WinAnsi` gap bytes (`0x7F`, `0x81`, `0x8D`, `0x8F`,
353///   `0x90`, `0x9D`).
354///
355/// O(n) scan over 256 slots: fine for callers that touch it once
356/// per text run, sensible to memoize for hotter paths.
357///
358/// # Examples
359///
360/// ```
361/// use pdf_base14_metrics::winansi_byte;
362///
363/// assert_eq!(winansi_byte('A'), Some(b'A'));
364/// assert_eq!(winansi_byte('Ж'), None);
365/// ```
366#[must_use]
367pub fn winansi_byte(ch: char) -> Option<u8> {
368    winansi_char_map::WINANSI_CHAR_MAP
369        .iter()
370        .position(|&c| c == Some(ch))
371        .and_then(|i| u8::try_from(i).ok())
372}
373
374// Test-only visibility shim for `tests/winansi_vendor.rs`. The const
375// is `#[doc(hidden)]` so it doesn't leak into the public API surface,
376// and lives here only so the integration test can re-derive the same
377// map from the Adobe Glyph List at test runtime and assert
378// byte-for-byte equality.
379#[doc(hidden)]
380pub const __WINANSI_CHAR_MAP: [Option<char>; 256] = winansi_char_map::WINANSI_CHAR_MAP;
381
382/// Returns the PostScript glyph name for `ch` *if and only if* `ch`
383/// is in the **extended** tier: i.e. a Core 14 AFM glyph that has
384/// no `WinAnsi` byte and therefore must be reached through a custom
385/// `/Encoding` `/Differences` slot. The extended tier covers:
386///
387/// - most of Latin Extended-A (`Ł`, `ł`, `Ě`, `ě`, `Ő`, `ő`, …,
388///   excluding those that already live in `WinAnsi` like
389///   `š`/`Š`/`ž`/`Ž`);
390/// - the Latin Extended-B comma-below set `Ș`/`ș`/`Ț`/`ț`;
391/// - the spacing diacritics `˘ˇ˙˝˛˚`;
392/// - the math operators `−≤≥≠√∂∑∆◊`;
393/// - the `fraction` slash `⁄` and the `fi`/`fl` ligatures.
394///
395/// Returns `None` for **two distinct cases that callers must
396/// distinguish**:
397///
398/// 1. **`WinAnsi` natives**: `š` (U+0161), `ž` (U+017E), `Š`, `Ž`,
399///    the accented Latin-1 alphabet, `€`, `“`, ... These *do* have
400///    PostScript glyph names in the AFM, but this function returns
401///    `None` for them because they're reachable through
402///    [`winansi_byte`] instead and don't need a `/Differences` slot.
403///    Callers querying "what's the AFM glyph name for `é`?" should
404///    use [`Base14Font::glyph_width_by_name`] on the result of
405///    [`winansi_glyph_name`]`(`[`winansi_byte`]`(ch)?)`, or just
406///    measure widths through [`Base14Font::winansi_width`].
407/// 2. **Unmappable codepoints** with no glyph in any Core 14 font
408///    (Cyrillic, CJK, emoji, most non-European scripts). The PDF
409///    backend silently substitutes these to `?` for Base14 runs;
410///    real coverage requires the bundled embedded family that
411///    `mos-fonts` provides.
412///
413/// The name `extended_glyph_name` is deliberately chosen over the
414/// shorter `glyph_name` to avoid surprising readers who reach for
415/// the function expecting "AFM name for any char." For *any-tier*
416/// AFM lookup the two-step (`winansi_glyph_name` ∘ `winansi_byte`)
417/// then-fallback-to-`extended_glyph_name` composition is the way.
418///
419/// Used by the PDF backend's `/Differences`-based encoding planner
420/// to allocate slots for the extended tier.
421///
422/// # Examples
423///
424/// ```
425/// use pdf_base14_metrics::extended_glyph_name;
426///
427/// assert_eq!(extended_glyph_name('Ł'), Some("Lslash"));
428/// assert_eq!(extended_glyph_name('A'), None);
429/// ```
430#[must_use]
431pub fn extended_glyph_name(ch: char) -> Option<&'static str> {
432    agl_subset::agl_glyph_name(ch)
433}
434
435#[cfg(test)]
436mod tests {
437    use super::*;
438
439    #[test]
440    fn glyph_width_by_name_matches_linear_scan_for_every_helvetica_glyph() {
441        let face = Base14Font::Helvetica;
442        for c in face.metrics().character_metrics.iter() {
443            let by_name = face.glyph_width_by_name(c.name.as_ref());
444            assert_eq!(
445                by_name,
446                Some(c.width_x),
447                "by-name mismatch for {:?}",
448                c.name
449            );
450        }
451    }
452
453    #[test]
454    fn glyph_width_by_name_resolves_non_winansi_glyphs() {
455        // Helvetica.adobe-font-metrics:  C -1 ; WX 222 ; N lslash ; ...  (well, lslash
456        // is actually encoded at C 248 in AdobeStandardEncoding, but
457        // either way the width is the same.) The PDF spec lets us
458        // address it through /Differences.
459        let face = Base14Font::Helvetica;
460        assert_eq!(face.glyph_width_by_name("lslash"), Some(222.0));
461        assert_eq!(face.glyph_width_by_name("Lslash"), Some(556.0));
462        assert_eq!(face.glyph_width_by_name("ecaron"), Some(556.0));
463        assert_eq!(face.glyph_width_by_name("rcaron"), Some(333.0));
464    }
465
466    #[test]
467    fn glyph_width_by_name_returns_none_for_unknown_glyph() {
468        assert_eq!(Base14Font::Helvetica.glyph_width_by_name(""), None);
469        assert_eq!(
470            Base14Font::Helvetica.glyph_width_by_name("notarealglyph"),
471            None
472        );
473    }
474
475    #[test]
476    fn glyph_width_by_name_returns_none_for_symbol_and_dingbats() {
477        // Documented contract: those faces don't participate in
478        // /Differences-based remapping.
479        assert_eq!(Base14Font::Symbol.glyph_width_by_name("A"), None);
480        assert_eq!(Base14Font::ZapfDingbats.glyph_width_by_name("A"), None);
481    }
482
483    #[test]
484    fn courier_carries_the_same_extended_glyph_set_as_helvetica() {
485        // The 12 Latin Core 14 faces share an identical 315-name glyph
486        // inventory (verified by `diff` on the AFM CharSets); the
487        // planner can rely on "if Helvetica has it, Courier does too"
488        // when deciding whether to remap a slot.
489        for name in &["lslash", "ecaron", "tcommaaccent", "ohungarumlaut"] {
490            assert!(
491                Base14Font::Courier.glyph_width_by_name(name).is_some(),
492                "Courier missing {name}"
493            );
494        }
495    }
496
497    #[test]
498    fn extended_glyph_name_resolves_polish_and_czech() {
499        assert_eq!(extended_glyph_name('ł'), Some("lslash"));
500        assert_eq!(extended_glyph_name('Ł'), Some("Lslash"));
501        assert_eq!(extended_glyph_name('ě'), Some("ecaron"));
502        // ž is a WinAnsi native, not in the extended tier: by
503        // contract `extended_glyph_name` returns `None` even though
504        // the AFM does carry a `zcaron` glyph (reachable through
505        // `winansi_byte` / `winansi_glyph_name` instead).
506        assert_eq!(extended_glyph_name('ž'), None);
507        // 'A' is also a WinAnsi native and returns None.
508        assert_eq!(extended_glyph_name('A'), None);
509    }
510}