Skip to main content

mos_fonts/
metrics.rs

1use crate::{Base14Font, Font, extended_glyph_name, normalize::nfc_text, shape, winansi_byte};
2
3/// Advance width of `text` rendered in `font` at `size` points.
4///
5/// Input is normalized through [`crate::nfc_text`] before any width
6/// calculation. Decomposed sequences such as `S\u{0326}` therefore
7/// measure as their precomposed NFC form (`Ș`) in both the Base14
8/// per-character AFM path and the embedded-font shaping path.
9/// [`glyph_width`] delegates here for a one-character string, so it
10/// inherits the same normalization behavior.
11///
12/// For Base14 faces this sums per-character AFM widths (`WinAnsi`
13/// natives + extended Latin reachable via [`extended_glyph_name`]).
14/// Characters outside both tiers: Cyrillic, CJK, emoji: get the
15/// width of `?` (the substitution glyph the PDF emit path also uses
16/// for those characters in Base14 runs). No diagnostic; callers wanting
17/// real coverage should pick an embedded family.
18///
19/// For embedded faces this shapes via `rustybuzz` for glyph selection
20/// and sums the resulting PDF-emittable glyph advances. Positioning
21/// offsets are currently normalized away so layout matches PDF output.
22///
23/// # Examples
24///
25/// ```
26/// use mos_fonts::{Base14Font, Font, text_width};
27///
28/// let width = text_width(Font::Base14(Base14Font::Helvetica), 10.0, "A");
29///
30/// assert_eq!(width, 6.67);
31/// ```
32#[must_use]
33pub fn text_width(font: Font, size: f32, text: &str) -> f32 {
34    let text = nfc_text(text);
35    let text = text.as_ref();
36    match font {
37        Font::Base14(f) => {
38            let mut units: f32 = 0.0;
39            for ch in text.chars() {
40                units += base14_glyph_units(f, ch);
41            }
42            units * size / 1000.0
43        }
44        Font::Embedded(id) => {
45            let ef = id.data();
46            let glyphs = shape(ef, text);
47            let upem = f32::from(ef.units_per_em);
48            glyphs
49                .iter()
50                .map(|g| advance_units_to_pt(g.advance_units, size, upem))
51                .sum()
52        }
53    }
54}
55
56/// Convert a font-unit advance to PDF user-space points at `size_pt`,
57/// given the face's units-per-em. Values are carried as `i32` because
58/// shapers use signed advances, but current embedded output normalizes
59/// to PDF-emittable `hmtx` advances in `0..=65535`. Preserve sign here
60/// anyway so future positioned shaping cannot turn a negative adjustment
61/// into a huge positive width.
62///
63/// # Examples
64///
65/// ```
66/// use mos_fonts::advance_units_to_pt;
67///
68/// assert_eq!(advance_units_to_pt(500, 12.0, 1000.0), 6.0);
69/// assert_eq!(advance_units_to_pt(-500, 12.0, 1000.0), -6.0);
70/// ```
71pub fn advance_units_to_pt(advance_units: i32, size_pt: f32, upem: f32) -> f32 {
72    let magnitude = u16::try_from(advance_units.unsigned_abs()).unwrap_or(u16::MAX);
73    let advance = f32::from(magnitude);
74    if advance_units.is_negative() {
75        -advance * size_pt / upem
76    } else {
77        advance * size_pt / upem
78    }
79}
80
81/// Width of a single glyph in `font` at `size` points. For Base14
82/// faces this is one AFM lookup; for embedded faces it shapes the
83/// single character. Used by the paragraph engine for character-wise
84/// hyphenation of oversized words.
85///
86/// # Examples
87///
88/// ```
89/// use mos_fonts::{Base14Font, Font, glyph_width};
90///
91/// assert_eq!(glyph_width(Font::Base14(Base14Font::Helvetica), 10.0, 'A'), 6.67);
92/// ```
93#[must_use]
94pub fn glyph_width(font: Font, size: f32, ch: char) -> f32 {
95    let mut buf = [0u8; 4];
96    let s = ch.encode_utf8(&mut buf);
97    text_width(font, size, s)
98}
99
100/// Ascender height for `font` at `size` points.
101///
102/// # Examples
103///
104/// ```
105/// use mos_fonts::{Base14Font, Font, ascent};
106///
107/// assert!(ascent(Font::Base14(Base14Font::Helvetica), 10.0) > 0.0);
108/// ```
109#[must_use]
110pub fn ascent(font: Font, size: f32) -> f32 {
111    match font {
112        Font::Base14(f) => f.metrics().ascender * size / 1000.0,
113        Font::Embedded(id) => {
114            let ef = id.data();
115            f32::from(ef.ascender) * size / f32::from(ef.units_per_em)
116        }
117    }
118}
119
120/// Descender depth for `font` at `size` points, as a **positive**
121/// number (the AFM/TTF storage convention is negative; both backends
122/// normalise on the way out).
123///
124/// # Examples
125///
126/// ```
127/// use mos_fonts::{Base14Font, Font, descent};
128///
129/// assert!(descent(Font::Base14(Base14Font::Helvetica), 10.0) > 0.0);
130/// ```
131#[must_use]
132pub fn descent(font: Font, size: f32) -> f32 {
133    match font {
134        Font::Base14(f) => -f.metrics().descender * size / 1000.0,
135        Font::Embedded(id) => {
136            let ef = id.data();
137            -f32::from(ef.descender) * size / f32::from(ef.units_per_em)
138        }
139    }
140}
141
142/// Width of a single character in a Base14 face, in 1/1000 em. `WinAnsi`
143/// natives go through the baked O(1) table; extended glyphs (Latin
144/// Extended-A, math operators, ligatures) go through the baked sorted
145/// name index. Anything else (Cyrillic, CJK, emoji) silently returns
146/// the width of `?`; the PDF emit path renders those characters as
147/// `?` too, so widths and content stream stay in sync. Embedded
148/// families exist precisely so callers wanting real coverage can opt
149/// out of this `?`-everywhere behaviour.
150fn base14_glyph_units(face: Base14Font, ch: char) -> f32 {
151    if matches!(face, Base14Font::Symbol | Base14Font::ZapfDingbats) {
152        // Symbol/Dingbats don't carry WinAnsi widths. The layout
153        // engine doesn't route runs into them today; treat as 0
154        // rather than panic.
155        return 0.0;
156    }
157    if let Some(byte) = winansi_byte(ch) {
158        return face.winansi_width(byte).unwrap_or(0.0);
159    }
160    if let Some(name) = extended_glyph_name(ch)
161        && let Some(w) = face.glyph_width_by_name(name)
162    {
163        return w;
164    }
165    // Fallback: width of `?` (WinAnsi byte 0x3F). Always present in
166    // every Latin Core 14 face.
167    face.winansi_width(b'?').unwrap_or(0.0)
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173    use crate::EmbeddedFontId;
174
175    const HELV: Font = Font::Base14(Base14Font::Helvetica);
176    const HELV_BOLD: Font = Font::Base14(Base14Font::HelveticaBold);
177    const HELV_OBLIQUE: Font = Font::Base14(Base14Font::HelveticaOblique);
178    const COURIER: Font = Font::Base14(Base14Font::Courier);
179
180    #[test]
181    fn helvetica_space_width_is_278_thou_em() {
182        let w = text_width(HELV, 1000.0, " ");
183        assert!((w - 278.0).abs() < 1e-6);
184    }
185
186    #[test]
187    fn helvetica_apostrophe_matches_afm() {
188        let w = text_width(HELV, 1000.0, "'");
189        assert!((w - 191.0).abs() < 1e-6, "got {w}");
190    }
191
192    #[test]
193    fn courier_is_monospace() {
194        let a = text_width(COURIER, 12.0, "a");
195        let m = text_width(COURIER, 12.0, "M");
196        assert_eq!(a, m);
197    }
198
199    #[test]
200    fn bold_is_wider_than_regular_for_caps() {
201        let r = text_width(HELV, 100.0, "B");
202        let b = text_width(HELV_BOLD, 100.0, "B");
203        assert!(b > r);
204    }
205
206    #[test]
207    fn helvetica_capital_a_matches_adobe_core14_afm() {
208        let w = text_width(HELV, 1000.0, "A");
209        assert!((w - 667.0).abs() < 1e-3, "got {w}");
210        let wo = text_width(HELV_OBLIQUE, 1000.0, "A");
211        assert!((wo - 667.0).abs() < 1e-3, "got {wo}");
212        let wb = text_width(HELV_BOLD, 1000.0, "A");
213        assert!((wb - 722.0).abs() < 1e-3, "got {wb}");
214    }
215
216    #[test]
217    fn helvetica_eacute_matches_adobe_core14_afm() {
218        let lower = text_width(HELV, 1000.0, "é");
219        assert!((lower - 556.0).abs() < 1e-3, "got {lower}");
220        let upper = text_width(HELV, 1000.0, "É");
221        assert!((upper - 667.0).abs() < 1e-3, "got {upper}");
222    }
223
224    #[test]
225    fn base14_non_winansi_falls_back_to_question_mark_silently() {
226        // Cyrillic П has no glyph in any Base14 face. The width path
227        // returns the width of `?` (so width measurements stay
228        // consistent with the rendered output) and emits no diagnostic.
229        // PDF emission renders `?` for the same character.
230        let q = text_width(HELV, 1000.0, "?");
231        let cyrillic = text_width(HELV, 1000.0, "П");
232        assert!((q - cyrillic).abs() < 1e-3, "q={q} cyr={cyrillic}");
233    }
234
235    #[test]
236    fn helvetica_lslash_resolves_through_extended_glyph_name_lookup() {
237        let w = text_width(HELV, 1000.0, "ł");
238        assert!((w - 222.0).abs() < 1e-3, "got {w}");
239        let lodz = text_width(HELV, 1000.0, "Łódź");
240        assert!(
241            (lodz - (556.0 + 556.0 + 556.0 + 500.0)).abs() < 1e-3,
242            "got {lodz}"
243        );
244    }
245
246    #[test]
247    fn embedded_text_width_is_nonzero_for_cyrillic() {
248        // The whole point: scripts the Base14 fonts can't render get
249        // real widths through the embedded path.
250        let font = Font::Embedded(EmbeddedFontId::Regular);
251        let w = text_width(font, 12.0, "Привет");
252        assert!(w > 0.0);
253    }
254
255    #[test]
256    fn embedded_text_width_normalizes_decomposed_romanian() {
257        let font = Font::Embedded(EmbeddedFontId::Regular);
258        let decomposed = text_width(font, 12.0, "S\u{0326}");
259        let precomposed = text_width(font, 12.0, "\u{0218}");
260
261        assert!((decomposed - precomposed).abs() < f32::EPSILON);
262    }
263
264    #[test]
265    fn advance_units_to_pt_preserves_negative_sign() {
266        let actual = advance_units_to_pt(-1000, 12.0, 1000.0);
267        assert!((actual + 12.0).abs() < f32::EPSILON, "got {actual}");
268    }
269}