pdf_base14_metrics/lib.rs
1//! Pre-parsed Adobe Core 14 PDF font metrics.
2//!
3//! The 14 PostScript faces every PDF 1.7-conformant viewer ships
4//! built-in: Helvetica × 4, Times × 4, Courier × 4, Symbol,
5//! `ZapfDingbats`: exposed as `&'static FontMetrics<'static>` constants
6//! that cost nothing at runtime. The AFM files are vendored from
7//! [`tecnickcom/tc-font-core14-afms`] under `data/`, parsed by the
8//! sibling [`adobe-font-metrics`] crate at build time (see `build.rs`),
9//! and baked into Rust statics in `$OUT_DIR/baked.rs`.
10//!
11
12//! [`tecnickcom/tc-font-core14-afms`]: https://github.com/tecnickcom/tc-font-core14-afms
13//! [`adobe-font-metrics`]: https://crates.io/crates/adobe-font-metrics
14//!
15//! # Quick start
16//!
17//! ```
18//! use pdf_base14_metrics::Base14Font;
19//!
20//! // Look up a glyph width by PostScript name.
21//! assert_eq!(Base14Font::Helvetica.glyph_width("A"), Some(667.0));
22//!
23//! // Or via PDF `WinAnsiEncoding` byte (Latin faces only).
24//! assert_eq!(Base14Font::Helvetica.winansi_width(b'A'), Some(667.0));
25//!
26//! // Iterate every Core 14 face in stable order.
27//! for f in Base14Font::ALL {
28//! let m = f.metrics();
29//! assert!(!m.character_metrics.is_empty());
30//! }
31//! ```
32//!
33//! # Encoding caveat: Symbol and `ZapfDingbats`
34//!
35//! [`Base14Font::winansi_width`] returns `None` for [`Base14Font::Symbol`]
36//! and [`Base14Font::ZapfDingbats`]: those fonts use their own
37//! PostScript encodings (Greek/math operators and named dingbats
38//! respectively), not `WinAnsi`. Querying them through a Latin-1 byte
39//! would be a category error; the byte `0x41` is `"A"` in `WinAnsi`
40//! but `"Alpha"` in Symbol. Callers must reach for the per-glyph
41//! [`Base14Font::glyph_width`] API for those two fonts.
42//!
43//! # License
44//!
45//! The crate's Rust source is MIT. The 14 vendored AFM files in
46//! `data/afm/` ship under Adobe's permissive Core 14 AFM license
47//! (`APAFML`); see `LICENSE-APAFML` in the crate root. The combined
48//! SPDX expression is `MIT AND APAFML`.
49
50#![doc(
51 html_logo_url = "https://mosaic.kjanat.dev/assets/A4.svg",
52 html_favicon_url = "https://mosaic.kjanat.dev/assets/A4.svg"
53)]
54#![deny(missing_docs)]
55
56pub use adobe_font_metrics::{BBox, CharacterMetric, FontMetrics, KerningPair};
57
58use std::borrow::Cow;
59
60mod agl_subset;
61mod winansi_char_map;
62mod winansi_table;
63
64// The generated file references `BBox`, `CharacterMetric`,
65// `FontMetrics`, `KerningPair`, and `Cow` unqualified; all are in
66// scope via the `pub use` and `use` above.
67include!(concat!(env!("OUT_DIR"), "/baked.rs"));
68
69/// One of the 14 standard PDF fonts every conformant PDF reader
70/// ships built in (PDF 1.7 §9.6.2.2).
71///
72/// Variants are listed in the canonical PDF order: the four
73/// Helvetica weights, four Times weights, four Courier weights,
74/// then Symbol and `ZapfDingbats`. [`Self::ALL`] iterates them in
75/// this order.
76///
77/// # Examples
78///
79/// ```
80/// use pdf_base14_metrics::Base14Font;
81///
82/// assert_eq!(Base14Font::ALL.len(), 14);
83/// assert_eq!(Base14Font::Helvetica.pdf_base_name(), "Helvetica");
84/// ```
85#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
86pub enum Base14Font {
87 /// Helvetica (regular).
88 Helvetica,
89 /// Helvetica Bold.
90 HelveticaBold,
91 /// Helvetica Oblique (regular weight, slanted).
92 HelveticaOblique,
93 /// Helvetica Bold Oblique.
94 HelveticaBoldOblique,
95 /// Times Roman (regular).
96 TimesRoman,
97 /// Times Bold.
98 TimesBold,
99 /// Times Italic.
100 TimesItalic,
101 /// Times Bold Italic.
102 TimesBoldItalic,
103 /// Courier (regular, monospace).
104 Courier,
105 /// Courier Bold (monospace).
106 CourierBold,
107 /// Courier Oblique (monospace, slanted).
108 CourierOblique,
109 /// Courier Bold Oblique (monospace).
110 CourierBoldOblique,
111 /// Adobe Symbol (Greek letters, math operators).
112 Symbol,
113 /// ITC Zapf Dingbats (decorative glyphs).
114 ZapfDingbats,
115}
116
117impl Base14Font {
118 /// Every Core 14 face in stable PDF order.
119 pub const ALL: [Self; 14] = [
120 Self::Helvetica,
121 Self::HelveticaBold,
122 Self::HelveticaOblique,
123 Self::HelveticaBoldOblique,
124 Self::TimesRoman,
125 Self::TimesBold,
126 Self::TimesItalic,
127 Self::TimesBoldItalic,
128 Self::Courier,
129 Self::CourierBold,
130 Self::CourierOblique,
131 Self::CourierBoldOblique,
132 Self::Symbol,
133 Self::ZapfDingbats,
134 ];
135
136 /// Borrows the pre-parsed Adobe AFM metrics for this face.
137 ///
138 /// # Examples
139 ///
140 /// ```
141 /// use pdf_base14_metrics::Base14Font;
142 ///
143 /// let metrics = Base14Font::Helvetica.metrics();
144 ///
145 /// assert_eq!(metrics.font_name, "Helvetica");
146 /// ```
147 #[must_use]
148 pub fn metrics(self) -> &'static FontMetrics<'static> {
149 match self {
150 Self::Helvetica => &HELVETICA,
151 Self::HelveticaBold => &HELVETICA_BOLD,
152 Self::HelveticaOblique => &HELVETICA_OBLIQUE,
153 Self::HelveticaBoldOblique => &HELVETICA_BOLDOBLIQUE,
154 Self::TimesRoman => &TIMES_ROMAN,
155 Self::TimesBold => &TIMES_BOLD,
156 Self::TimesItalic => &TIMES_ITALIC,
157 Self::TimesBoldItalic => &TIMES_BOLDITALIC,
158 Self::Courier => &COURIER,
159 Self::CourierBold => &COURIER_BOLD,
160 Self::CourierOblique => &COURIER_OBLIQUE,
161 Self::CourierBoldOblique => &COURIER_BOLDOBLIQUE,
162 Self::Symbol => &SYMBOL,
163 Self::ZapfDingbats => &ZAPFDINGBATS,
164 }
165 }
166
167 /// PDF `/BaseFont` name per PDF 1.7 §9.6.2.2. These are the
168 /// exact bytes a conformant PDF writer puts after `/BaseFont`
169 /// in a font resource dictionary.
170 ///
171 /// # Examples
172 ///
173 /// ```
174 /// use pdf_base14_metrics::Base14Font;
175 ///
176 /// assert_eq!(Base14Font::TimesBoldItalic.pdf_base_name(), "Times-BoldItalic");
177 /// ```
178 #[must_use]
179 pub fn pdf_base_name(self) -> &'static str {
180 match self {
181 Self::Helvetica => "Helvetica",
182 Self::HelveticaBold => "Helvetica-Bold",
183 Self::HelveticaOblique => "Helvetica-Oblique",
184 Self::HelveticaBoldOblique => "Helvetica-BoldOblique",
185 Self::TimesRoman => "Times-Roman",
186 Self::TimesBold => "Times-Bold",
187 Self::TimesItalic => "Times-Italic",
188 Self::TimesBoldItalic => "Times-BoldItalic",
189 Self::Courier => "Courier",
190 Self::CourierBold => "Courier-Bold",
191 Self::CourierOblique => "Courier-Oblique",
192 Self::CourierBoldOblique => "Courier-BoldOblique",
193 Self::Symbol => "Symbol",
194 Self::ZapfDingbats => "ZapfDingbats",
195 }
196 }
197
198 /// Width of the glyph with the given PostScript name, in 1/1000
199 /// em. Returns `None` if no such glyph exists in this font.
200 ///
201 /// This is an O(n) linear scan over the font's character metrics
202 /// (~315 entries for the Latin faces). Prefer
203 /// [`Self::winansi_width`] when querying by byte; that path
204 /// goes through a pre-baked O(1) table. For the Latin Core 12
205 /// faces, [`Self::glyph_width_by_name`] goes through a baked
206 /// sorted index instead and is O(log n).
207 ///
208 /// # Examples
209 ///
210 /// ```
211 /// use pdf_base14_metrics::Base14Font;
212 ///
213 /// assert_eq!(Base14Font::Helvetica.glyph_width("A"), Some(667.0));
214 /// ```
215 #[must_use]
216 pub fn glyph_width(self, name: &str) -> Option<f32> {
217 self.metrics()
218 .character_metrics
219 .iter()
220 .find(|c| c.name == name)
221 .map(|c| c.width_x)
222 }
223
224 /// Width of the glyph with the given PostScript name, looked up
225 /// through a baked sorted index. O(log n), allocation-free,
226 /// safe to call once per character per PDF page in tight loops.
227 ///
228 /// Returns `None` for [`Self::Symbol`] and [`Self::ZapfDingbats`]
229 ///: their AFMs are intentionally unindexed because those faces
230 /// don't participate in `/Differences`-style remapping. Callers
231 /// that need Symbol/Dingbat widths must use [`Self::glyph_width`].
232 ///
233 /// # Examples
234 ///
235 /// ```
236 /// use pdf_base14_metrics::Base14Font;
237 ///
238 /// assert_eq!(Base14Font::Helvetica.glyph_width_by_name("A"), Some(667.0));
239 /// assert_eq!(Base14Font::Symbol.glyph_width_by_name("Alpha"), None);
240 /// ```
241 #[must_use]
242 pub fn glyph_width_by_name(self, name: &str) -> Option<f32> {
243 let table = self.name_width_table()?;
244 table
245 .binary_search_by(|(n, _)| (*n).cmp(name))
246 .ok()
247 .map(|i| table[i].1)
248 }
249
250 /// Returns the baked `(name, width)` index for Latin Core 12
251 /// faces, or `None` for `Symbol`/`ZapfDingbats`.
252 fn name_width_table(self) -> Option<&'static [(&'static str, f32)]> {
253 match self {
254 Self::Symbol | Self::ZapfDingbats => None,
255 Self::Helvetica => Some(HELVETICA_NAME_WIDTHS),
256 Self::HelveticaBold => Some(HELVETICA_BOLD_NAME_WIDTHS),
257 Self::HelveticaOblique => Some(HELVETICA_OBLIQUE_NAME_WIDTHS),
258 Self::HelveticaBoldOblique => Some(HELVETICA_BOLDOBLIQUE_NAME_WIDTHS),
259 Self::TimesRoman => Some(TIMES_ROMAN_NAME_WIDTHS),
260 Self::TimesBold => Some(TIMES_BOLD_NAME_WIDTHS),
261 Self::TimesItalic => Some(TIMES_ITALIC_NAME_WIDTHS),
262 Self::TimesBoldItalic => Some(TIMES_BOLDITALIC_NAME_WIDTHS),
263 Self::Courier => Some(COURIER_NAME_WIDTHS),
264 Self::CourierBold => Some(COURIER_BOLD_NAME_WIDTHS),
265 Self::CourierOblique => Some(COURIER_OBLIQUE_NAME_WIDTHS),
266 Self::CourierBoldOblique => Some(COURIER_BOLDOBLIQUE_NAME_WIDTHS),
267 }
268 }
269
270 /// Width of the glyph at PDF `WinAnsiEncoding` byte `code`, in
271 /// 1/1000 em. Returns `None` when:
272 ///
273 /// - `code` is unmapped by PDF `WinAnsi` (control characters
274 /// `0x00..=0x1F`, the gaps `0x7F` / `0x81` / `0x8D` / `0x8F`
275 /// / `0x90` / `0x9D`); or
276 /// - `self` is [`Self::Symbol`] or [`Self::ZapfDingbats`].
277 /// those fonts do not use `WinAnsi` (see the crate-level docs).
278 ///
279 /// Implemented as a single `[Option<f32>; 256]` indexed load
280 /// per call: the table is baked at build time alongside the
281 /// font metrics. Hot enough for `mos-fonts::text_width` to
282 /// call once per character per typeset paragraph.
283 ///
284 /// # Examples
285 ///
286 /// ```
287 /// use pdf_base14_metrics::Base14Font;
288 ///
289 /// assert_eq!(Base14Font::Helvetica.winansi_width(b'A'), Some(667.0));
290 /// assert_eq!(Base14Font::Symbol.winansi_width(b'A'), None);
291 /// ```
292 #[must_use]
293 pub fn winansi_width(self, code: u8) -> Option<f32> {
294 self.winansi_table().and_then(|t| t[code as usize])
295 }
296
297 /// The pre-baked `WinAnsi` width table, or `None` for fonts whose
298 /// canonical encoding isn't `WinAnsi`.
299 fn winansi_table(self) -> Option<&'static [Option<f32>; 256]> {
300 match self {
301 Self::Symbol | Self::ZapfDingbats => None,
302 Self::Helvetica => Some(&HELVETICA_WINANSI),
303 Self::HelveticaBold => Some(&HELVETICA_BOLD_WINANSI),
304 Self::HelveticaOblique => Some(&HELVETICA_OBLIQUE_WINANSI),
305 Self::HelveticaBoldOblique => Some(&HELVETICA_BOLDOBLIQUE_WINANSI),
306 Self::TimesRoman => Some(&TIMES_ROMAN_WINANSI),
307 Self::TimesBold => Some(&TIMES_BOLD_WINANSI),
308 Self::TimesItalic => Some(&TIMES_ITALIC_WINANSI),
309 Self::TimesBoldItalic => Some(&TIMES_BOLDITALIC_WINANSI),
310 Self::Courier => Some(&COURIER_WINANSI),
311 Self::CourierBold => Some(&COURIER_BOLD_WINANSI),
312 Self::CourierOblique => Some(&COURIER_OBLIQUE_WINANSI),
313 Self::CourierBoldOblique => Some(&COURIER_BOLDOBLIQUE_WINANSI),
314 }
315 }
316}
317
318/// Returns the PostScript glyph name assigned to PDF `WinAnsiEncoding`
319/// byte `code`, or `None` for unmapped codes.
320///
321/// PDF `WinAnsi` is **not** Microsoft CP1252; see PDF 1.7 Annex D.2
322/// for the canonical table. The two encodings differ at codes
323/// `0x7F`, `0x81`, `0x8D`, `0x8F`, `0x90`, and `0x9D` (gaps in PDF,
324/// assorted glyphs or DEL in CP1252).
325///
326/// This is exposed primarily so downstream crates (e.g.
327/// `mos-fonts`) can delegate to the canonical table rather than
328/// maintain their own copy.
329///
330/// # Examples
331///
332/// ```
333/// use pdf_base14_metrics::winansi_glyph_name;
334///
335/// assert_eq!(winansi_glyph_name(b'A'), Some("A"));
336/// assert_eq!(winansi_glyph_name(0x7F), None);
337/// ```
338#[must_use]
339pub fn winansi_glyph_name(code: u8) -> Option<&'static str> {
340 winansi_table::WINANSI_TABLE[code as usize]
341}
342
343/// Returns the PDF `WinAnsiEncoding` byte that encodes `ch`, or
344/// `None` if `ch` has no slot in `WinAnsi`.
345///
346/// The inverse of the byte→char mapping transcribed from
347/// PDF 1.7 Annex D.2 Table D.2 into
348/// `winansi_char_map::WINANSI_CHAR_MAP`. Returns `None` for:
349///
350/// - Characters that have no glyph in `WinAnsi` (Cyrillic, CJK,
351/// most accented Vietnamese, etc.).
352/// - The six `WinAnsi` gap bytes (`0x7F`, `0x81`, `0x8D`, `0x8F`,
353/// `0x90`, `0x9D`).
354///
355/// O(n) scan over 256 slots: fine for callers that touch it once
356/// per text run, sensible to memoize for hotter paths.
357///
358/// # Examples
359///
360/// ```
361/// use pdf_base14_metrics::winansi_byte;
362///
363/// assert_eq!(winansi_byte('A'), Some(b'A'));
364/// assert_eq!(winansi_byte('Ж'), None);
365/// ```
366#[must_use]
367pub fn winansi_byte(ch: char) -> Option<u8> {
368 winansi_char_map::WINANSI_CHAR_MAP
369 .iter()
370 .position(|&c| c == Some(ch))
371 .and_then(|i| u8::try_from(i).ok())
372}
373
374// Test-only visibility shim for `tests/winansi_vendor.rs`. The const
375// is `#[doc(hidden)]` so it doesn't leak into the public API surface,
376// and lives here only so the integration test can re-derive the same
377// map from the Adobe Glyph List at test runtime and assert
378// byte-for-byte equality.
379#[doc(hidden)]
380pub const __WINANSI_CHAR_MAP: [Option<char>; 256] = winansi_char_map::WINANSI_CHAR_MAP;
381
382/// Returns the PostScript glyph name for `ch` *if and only if* `ch`
383/// is in the **extended** tier: i.e. a Core 14 AFM glyph that has
384/// no `WinAnsi` byte and therefore must be reached through a custom
385/// `/Encoding` `/Differences` slot. The extended tier covers:
386///
387/// - most of Latin Extended-A (`Ł`, `ł`, `Ě`, `ě`, `Ő`, `ő`, …,
388/// excluding those that already live in `WinAnsi` like
389/// `š`/`Š`/`ž`/`Ž`);
390/// - the Latin Extended-B comma-below set `Ș`/`ș`/`Ț`/`ț`;
391/// - the spacing diacritics `˘ˇ˙˝˛˚`;
392/// - the math operators `−≤≥≠√∂∑∆◊`;
393/// - the `fraction` slash `⁄` and the `fi`/`fl` ligatures.
394///
395/// Returns `None` for **two distinct cases that callers must
396/// distinguish**:
397///
398/// 1. **`WinAnsi` natives**: `š` (U+0161), `ž` (U+017E), `Š`, `Ž`,
399/// the accented Latin-1 alphabet, `€`, `“`, ... These *do* have
400/// PostScript glyph names in the AFM, but this function returns
401/// `None` for them because they're reachable through
402/// [`winansi_byte`] instead and don't need a `/Differences` slot.
403/// Callers querying "what's the AFM glyph name for `é`?" should
404/// use [`Base14Font::glyph_width_by_name`] on the result of
405/// [`winansi_glyph_name`]`(`[`winansi_byte`]`(ch)?)`, or just
406/// measure widths through [`Base14Font::winansi_width`].
407/// 2. **Unmappable codepoints** with no glyph in any Core 14 font
408/// (Cyrillic, CJK, emoji, most non-European scripts). The PDF
409/// backend silently substitutes these to `?` for Base14 runs;
410/// real coverage requires the bundled embedded family that
411/// `mos-fonts` provides.
412///
413/// The name `extended_glyph_name` is deliberately chosen over the
414/// shorter `glyph_name` to avoid surprising readers who reach for
415/// the function expecting "AFM name for any char." For *any-tier*
416/// AFM lookup the two-step (`winansi_glyph_name` ∘ `winansi_byte`)
417/// then-fallback-to-`extended_glyph_name` composition is the way.
418///
419/// Used by the PDF backend's `/Differences`-based encoding planner
420/// to allocate slots for the extended tier.
421///
422/// # Examples
423///
424/// ```
425/// use pdf_base14_metrics::extended_glyph_name;
426///
427/// assert_eq!(extended_glyph_name('Ł'), Some("Lslash"));
428/// assert_eq!(extended_glyph_name('A'), None);
429/// ```
430#[must_use]
431pub fn extended_glyph_name(ch: char) -> Option<&'static str> {
432 agl_subset::agl_glyph_name(ch)
433}
434
435#[cfg(test)]
436mod tests {
437 use super::*;
438
439 #[test]
440 fn glyph_width_by_name_matches_linear_scan_for_every_helvetica_glyph() {
441 let face = Base14Font::Helvetica;
442 for c in face.metrics().character_metrics.iter() {
443 let by_name = face.glyph_width_by_name(c.name.as_ref());
444 assert_eq!(
445 by_name,
446 Some(c.width_x),
447 "by-name mismatch for {:?}",
448 c.name
449 );
450 }
451 }
452
453 #[test]
454 fn glyph_width_by_name_resolves_non_winansi_glyphs() {
455 // Helvetica.adobe-font-metrics: C -1 ; WX 222 ; N lslash ; ... (well, lslash
456 // is actually encoded at C 248 in AdobeStandardEncoding, but
457 // either way the width is the same.) The PDF spec lets us
458 // address it through /Differences.
459 let face = Base14Font::Helvetica;
460 assert_eq!(face.glyph_width_by_name("lslash"), Some(222.0));
461 assert_eq!(face.glyph_width_by_name("Lslash"), Some(556.0));
462 assert_eq!(face.glyph_width_by_name("ecaron"), Some(556.0));
463 assert_eq!(face.glyph_width_by_name("rcaron"), Some(333.0));
464 }
465
466 #[test]
467 fn glyph_width_by_name_returns_none_for_unknown_glyph() {
468 assert_eq!(Base14Font::Helvetica.glyph_width_by_name(""), None);
469 assert_eq!(
470 Base14Font::Helvetica.glyph_width_by_name("notarealglyph"),
471 None
472 );
473 }
474
475 #[test]
476 fn glyph_width_by_name_returns_none_for_symbol_and_dingbats() {
477 // Documented contract: those faces don't participate in
478 // /Differences-based remapping.
479 assert_eq!(Base14Font::Symbol.glyph_width_by_name("A"), None);
480 assert_eq!(Base14Font::ZapfDingbats.glyph_width_by_name("A"), None);
481 }
482
483 #[test]
484 fn courier_carries_the_same_extended_glyph_set_as_helvetica() {
485 // The 12 Latin Core 14 faces share an identical 315-name glyph
486 // inventory (verified by `diff` on the AFM CharSets); the
487 // planner can rely on "if Helvetica has it, Courier does too"
488 // when deciding whether to remap a slot.
489 for name in &["lslash", "ecaron", "tcommaaccent", "ohungarumlaut"] {
490 assert!(
491 Base14Font::Courier.glyph_width_by_name(name).is_some(),
492 "Courier missing {name}"
493 );
494 }
495 }
496
497 #[test]
498 fn extended_glyph_name_resolves_polish_and_czech() {
499 assert_eq!(extended_glyph_name('ł'), Some("lslash"));
500 assert_eq!(extended_glyph_name('Ł'), Some("Lslash"));
501 assert_eq!(extended_glyph_name('ě'), Some("ecaron"));
502 // ž is a WinAnsi native, not in the extended tier: by
503 // contract `extended_glyph_name` returns `None` even though
504 // the AFM does carry a `zcaron` glyph (reachable through
505 // `winansi_byte` / `winansi_glyph_name` instead).
506 assert_eq!(extended_glyph_name('ž'), None);
507 // 'A' is also a WinAnsi native and returns None.
508 assert_eq!(extended_glyph_name('A'), None);
509 }
510}