Skip to main content

mos_pdf/
encoding.rs

1//! Per-document `/Differences`-based encoding planning for the Core 14
2//! Latin fonts.
3//!
4//! ## Problem
5//!
6//! PDF single-byte fonts address at most 256 glyphs. The Core 14
7//! `WinAnsiEncoding` only carries the ~216 Latin-1+Windows glyphs that
8//! every PDF reader ships built-in (Annex D.2). But each Core 14 AFM
9//! also lists 99 extra glyphs: Latin Extended-A (`Ł`, `ł`, `Ě`, …),
10//! the Romanian comma-below set, the spacing diacritics, the math
11//! operators, and the `fi`/`fl` ligatures, which have no `WinAnsi` byte.
12//!
13//! PDF's escape hatch is the `/Encoding` dictionary with a
14//! `/Differences` array: it lets us declare "byte 0x7F means
15//! `/lslash`, byte 0x81 means `/Lslash`, byte 0x90 means `/ecaron`"
16//! and so on, sitting on top of `WinAnsiEncoding` as the base. The
17//! glyph outlines still come from the reader's built-in Helvetica/
18//! Times/Courier; we just rearrange which byte addresses which glyph
19//! name from the AFM. No font data ships.
20//!
21//! ## Algorithm
22//!
23//! For each Latin Core 14 face actually used by some text run:
24//!
25//! 1. Walk every char of every run and partition into:
26//!    - `WinAnsi natives`: have `winansi_byte(ch) = Some(b)`. The byte
27//!      `b` is **claimed**: it can't be repurposed for a `Differences`
28//!      remap because the content stream already uses it.
29//!    - `Extended`: no `winansi_byte`, but `extended_glyph_name(ch)`
30//!      resolves to an AFM glyph name. Needs a remapped slot.
31//!    - `Unmappable`: neither (Cyrillic, CJK, emoji). Won't occur in
32//!      practice; the layout engine substitutes these to `?` upstream.
33//!      We treat them defensively as `?` here.
34//!
35//! 2. Allocate slots for the extended set from a deterministic free
36//!    pool:
37//!    a. The six `WinAnsi` gap bytes `0x7F, 0x81, 0x8D, 0x8F, 0x90,
38//!    0x9D` first; these are guaranteed unmapped in `WinAnsiEncoding`
39//!    and produce stable golden output for the common case (≤ 6
40//!    extended glyphs).
41//!    b. Then unused `0x20..=0xFF` slots in descending order. Going
42//!    high-to-low keeps short ASCII-heavy paragraphs from perturbing
43//!    low-byte slots; documents with rich punctuation at `0xE0..0xFF`
44//!    still get plenty of room from `0x20..0x7E`.
45//!
46//! 3. If we exhaust the pool before placing every extended char,
47//!    emit `MOS0032` and drop the overflow (those chars render as `?`).
48//!
49//! ## Output
50//!
51//! [`DocEncoding`] carries everything the PDF emit code needs: the
52//! `/Differences` pairs (slot → AFM glyph name) for the font dict,
53//! `byte_for_char` for the content-stream encoder, and
54//! `to_unicode_entries` for the `/ToUnicode` `CMap` so copy-paste keeps
55//! working.
56
57use std::collections::{BTreeMap, BTreeSet, HashMap};
58
59use mos_core::{Diagnostic, codes};
60use mos_layout::{Base14Font, Font, TextRun};
61
62// `Base14Font` doesn't derive `Ord`. Keying the planner's per-face
63// bucket by `Font` (which doesn't either, but is Hash/Eq) and using a
64// HashMap keeps the storage trivial; deterministic output comes from
65// sorting `differences` at the end of `plan_face` and from iterating
66// each face's char set through a `BTreeSet`.
67
68/// Per-font planning output. The PDF emit path consumes this once per
69/// document.
70#[derive(Debug, Clone, Default)]
71pub(crate) struct DocEncoding {
72    /// Slots remapped on top of `WinAnsiEncoding`. Sorted ascending
73    /// by slot so the emitted `/Differences` array is stable.
74    pub differences: Vec<(u8, &'static str)>,
75    /// Direct char → byte for the content-stream encoder. Covers
76    /// both `WinAnsi` natives (mapping to their canonical byte) and
77    /// extended chars (mapping to a remapped slot).
78    pub byte_for_char: HashMap<char, u8>,
79    /// Byte → original Unicode codepoint for the `/ToUnicode` `CMap`.
80    /// Ascending by byte for stable output.
81    pub to_unicode_entries: Vec<(u8, char)>,
82}
83
84impl DocEncoding {
85    /// `true` if this face needs a custom `/Encoding` dict (one or
86    /// more remapped slots). When `false`, callers should emit the
87    /// existing `/Encoding /WinAnsiEncoding` shortcut and skip
88    /// `/ToUnicode`.
89    pub(crate) fn has_differences(&self) -> bool {
90        !self.differences.is_empty()
91    }
92}
93
94/// Two-phase encoding planner: caller streams every `(face, ch)` in
95/// through [`Self::observe`], then calls [`Self::finalize`] to get one
96/// [`DocEncoding`] per Latin Core 14 face that participated.
97#[derive(Debug, Default)]
98pub(crate) struct EncodingPlanner {
99    /// Observed chars per face. `BTreeSet` for deterministic order
100    /// during finalize, which keeps `/Differences` arrays byte-stable
101    /// between runs. (`Base14Font` doesn't derive `Ord`, so the outer
102    /// container is a `HashMap`: finalize sorts what matters.)
103    used: HashMap<Base14Font, BTreeSet<char>>,
104}
105
106impl EncodingPlanner {
107    pub(crate) fn new() -> Self {
108        Self::default()
109    }
110
111    /// Record that `face` will need to render `ch`. Idempotent.
112    /// `Symbol` and `ZapfDingbats` are silently ignored; those faces
113    /// don't participate in `/Differences` planning (their encodings
114    /// are different category entirely; see crate-level
115    /// `pdf-base14-metrics` docs).
116    pub(crate) fn observe(&mut self, face: Base14Font, ch: char) {
117        if matches!(face, Base14Font::Symbol | Base14Font::ZapfDingbats) {
118            return;
119        }
120        self.used.entry(face).or_default().insert(ch);
121    }
122
123    /// Convenience: feed every char of every text run. Embedded-font
124    /// runs are skipped; they take the Type 0 CID path and don't
125    /// participate in `/Differences` planning.
126    pub(crate) fn observe_runs(&mut self, runs: &[TextRun]) {
127        for run in runs {
128            let Some(face) = run.font.base14() else {
129                continue;
130            };
131            for ch in run.text.chars() {
132                self.observe(face, ch);
133            }
134        }
135    }
136
137    /// Compute the per-face encoding plan. Any face never observed
138    /// is absent from the returned map; callers should fall back to
139    /// the predefined `WinAnsiEncoding` shortcut for those.
140    ///
141    /// Pushes a `MOS0032` diagnostic when a face's extended-glyph budget
142    /// overflows the 256-slot single-byte ceiling.
143    pub(crate) fn finalize(self, diagnostics: &mut Vec<Diagnostic>) -> HashMap<Font, DocEncoding> {
144        let mut out = HashMap::with_capacity(self.used.len());
145        for (face, chars) in self.used {
146            out.insert(Font::Base14(face), plan_face(face, &chars, diagnostics));
147        }
148        out
149    }
150}
151
152/// Computes the encoding plan for a single face given the set of
153/// chars the document needs from it.
154fn plan_face(
155    face: Base14Font,
156    chars: &BTreeSet<char>,
157    diagnostics: &mut Vec<Diagnostic>,
158) -> DocEncoding {
159    let mut byte_for_char: HashMap<char, u8> = HashMap::with_capacity(chars.len());
160    // `byte → char` so we can build /ToUnicode at the end. Used bytes
161    // include WinAnsi natives we claim and remapped slots.
162    let mut to_unicode: BTreeMap<u8, char> = BTreeMap::new();
163    // Bytes already claimed (either by a WinAnsi native or already
164    // remapped). Indexed by u8 for O(1) probe.
165    let mut claimed = [false; 256];
166    // Extended chars that need a remapped slot.
167    let mut extended: Vec<char> = Vec::new();
168
169    for &ch in chars {
170        if let Some(byte) = mos_fonts::winansi_byte(ch) {
171            byte_for_char.insert(ch, byte);
172            to_unicode.insert(byte, ch);
173            claimed[usize::from(byte)] = true;
174        } else if mos_fonts::extended_glyph_name(ch).is_some() {
175            extended.push(ch);
176        }
177        // No `else`: layout substituted unmappable chars to `?`
178        // already; the `?` is itself a WinAnsi native handled above.
179    }
180
181    // Skip the rest if nothing extended showed up: typical for
182    // pure-ASCII or pure-Latin-1 documents. Empty differences signals
183    // "use /Encoding /WinAnsiEncoding shortcut" to the emitter.
184    if extended.is_empty() {
185        return DocEncoding {
186            differences: Vec::new(),
187            byte_for_char,
188            to_unicode_entries: to_unicode.into_iter().collect(),
189        };
190    }
191
192    // Allocate slots. We materialise the preferred order into a
193    // small `Vec` rather than chaining a filter over `claimed`,
194    // because the borrow checker rejects a long-lived closure that
195    // captures `&claimed` while we also mutate `claimed[slot] =
196    // true` mid-loop. The eager Vec is at most 230 entries (6 gaps
197    // + 0x20..=0xFF) so the cost is negligible.
198    let mut free_slots: Vec<u8> = allocation_order()
199        .filter(|&b| !claimed[usize::from(b)])
200        .collect();
201    free_slots.reverse(); // pop from the back to preserve our order
202
203    let mut differences: Vec<(u8, &'static str)> = Vec::with_capacity(extended.len());
204    let mut overflowed: usize = 0;
205
206    for ch in extended {
207        let Some(name) = mos_fonts::extended_glyph_name(ch) else {
208            continue;
209        };
210        // Defensive: confirm the face actually carries this glyph.
211        // For the 12 Latin Core 14 faces the AGL subset only points
212        // at glyphs present in every Latin AFM, so this always
213        // succeeds; the check guards against future expansion of
214        // `extended_glyph_name` past the shared 315-name inventory.
215        if face.glyph_width_by_name(name).is_none() {
216            overflowed += 1;
217            continue;
218        }
219        let Some(slot) = free_slots.pop() else {
220            overflowed += 1;
221            continue;
222        };
223        differences.push((slot, name));
224        byte_for_char.insert(ch, slot);
225        to_unicode.insert(slot, ch);
226        claimed[usize::from(slot)] = true;
227    }
228
229    if overflowed > 0 {
230        diagnostics.push(Diagnostic::simple(
231            &codes::MOS0032,
232            None,
233            format!(
234                "extended glyph budget exhausted in {face:?}: {overflowed} \
235                 character(s) could not be encoded in the 256-slot \
236                 /Differences map and rendered as `?`"
237            ),
238        ));
239    }
240
241    differences.sort_unstable_by_key(|&(b, _)| b);
242
243    DocEncoding {
244        differences,
245        byte_for_char,
246        to_unicode_entries: to_unicode.into_iter().collect(),
247    }
248}
249
250/// Preferred slot-allocation order: the six `WinAnsi` gap bytes
251/// first (predictable golden output for ≤ 6 extended glyphs), then
252/// `0xFF..=0x20` descending **excluding** those same six bytes. We
253/// deliberately skip `0x00..=0x1F`: PDF readers tolerate control
254/// bytes in `/Differences`, but content streams that need an
255/// `Str(...)` literal can run afoul of `\0`/`\r`/`\n` escaping, and
256/// using high-byte slots first keeps short paragraphs from
257/// perturbing low-byte slots.
258///
259/// The gap-exclusion in the descending tail is load-bearing: without
260/// it the gap bytes would appear twice in the iterator (once at the
261/// front, once again at their natural position 0x7F/0x81/…/0x9D), and
262/// the planner, which doesn't re-check `claimed[slot]` after pop,
263/// would allocate the same byte to two different extended chars once
264/// `extended.len()` grew past ~104. Today the AGL subset has only
265/// ~99 entries so this latent bug couldn't fire, but it's wrong on
266/// principle. See `differences_have_unique_slots` for the regression
267/// guard.
268fn allocation_order() -> impl Iterator<Item = u8> {
269    const GAPS: [u8; 6] = [0x7F, 0x81, 0x8D, 0x8F, 0x90, 0x9D];
270    GAPS.into_iter()
271        .chain((0x20_u8..=0xFF_u8).rev().filter(|b| !GAPS.contains(b)))
272}
273
274#[cfg(test)]
275mod tests {
276    // No `#![allow]` here; every test uses `assert!`/`assert_eq!`
277    // for failure reporting; nothing reaches for `unwrap`/`expect`/
278    // `panic!`. Setup helpers stay infallible by routing missing-key
279    // lookups through `unwrap_or_default` (an Option combinator, not
280    // a panic).
281    use super::*;
282
283    fn plan(face: Base14Font, text: &str) -> (DocEncoding, Vec<Diagnostic>) {
284        let mut p = EncodingPlanner::new();
285        for ch in text.chars() {
286            p.observe(face, ch);
287        }
288        let mut diags = Vec::new();
289        let mut out = p.finalize(&mut diags);
290        let enc = out.remove(&Font::Base14(face)).unwrap_or_default();
291        (enc, diags)
292    }
293
294    #[test]
295    fn pure_ascii_needs_no_differences() {
296        let (enc, diags) = plan(Base14Font::Helvetica, "Hello, world!");
297        assert!(enc.differences.is_empty());
298        assert!(diags.is_empty());
299        // Every char round-trips through byte_for_char.
300        assert_eq!(enc.byte_for_char.get(&'H'), Some(&b'H'));
301        assert_eq!(enc.byte_for_char.get(&'!'), Some(&b'!'));
302    }
303
304    #[test]
305    fn latin1_only_needs_no_differences() {
306        // café Straße §1: every char is a WinAnsi native.
307        let (enc, diags) = plan(Base14Font::Helvetica, "café Straße §1");
308        assert!(enc.differences.is_empty());
309        assert!(diags.is_empty());
310        assert_eq!(enc.byte_for_char.get(&'é'), Some(&0xE9_u8));
311        assert_eq!(enc.byte_for_char.get(&'ß'), Some(&0xDF_u8));
312    }
313
314    #[test]
315    fn polish_lslash_lands_in_first_gap_slot() {
316        // Łódź: Ł and ź are extended, óó d ż all WinAnsi. ź is U+017A
317        // not WinAnsi.  Wait: ź is U+017A → AGL `zacute`, extended.
318        // ó is U+00F3 → WinAnsi 0xF3. d is ASCII 0x64.
319        let (enc, diags) = plan(Base14Font::Helvetica, "Łódź");
320        assert!(diags.is_empty());
321        // Two extended chars: Ł and ź. First two go to gap slots
322        // 0x7F (Ł) and 0x81 (ź) since the BTreeSet iterates in
323        // codepoint order (Ł=U+0141 < ź=U+017A).
324        assert_eq!(enc.differences.len(), 2);
325        assert_eq!(enc.differences[0], (0x7F, "Lslash"));
326        assert_eq!(enc.differences[1], (0x81, "zacute"));
327        assert_eq!(enc.byte_for_char.get(&'Ł'), Some(&0x7F_u8));
328        assert_eq!(enc.byte_for_char.get(&'ź'), Some(&0x81_u8));
329        // WinAnsi natives keep their canonical byte.
330        assert_eq!(enc.byte_for_char.get(&'ó'), Some(&0xF3_u8));
331        assert_eq!(enc.byte_for_char.get(&'d'), Some(&b'd'));
332    }
333
334    #[test]
335    fn czech_uses_only_gap_slots_when_under_6() {
336        // "Příliš žluťoučký kůň": extended chars: ě? no, "Příliš":
337        // Příliš = P, ř, í, l, i, š. ř (U+0159) is extended (rcaron).
338        // š (U+0161) is WinAnsi (0x9A). í (U+00ED) is WinAnsi (0xED).
339        // So one extended char: ř → first gap slot 0x7F.
340        let (enc, _) = plan(Base14Font::Helvetica, "Příliš");
341        assert_eq!(enc.differences.len(), 1);
342        assert_eq!(enc.differences[0], (0x7F, "rcaron"));
343    }
344
345    #[test]
346    fn to_unicode_covers_every_used_byte() {
347        let (enc, _) = plan(Base14Font::Helvetica, "Łódź");
348        // Every byte we emit (whether a WinAnsi native or a remap)
349        // must round-trip back to its Unicode codepoint.
350        let map: HashMap<u8, char> = enc.to_unicode_entries.iter().copied().collect();
351        assert_eq!(map.get(&0x7F), Some(&'Ł'));
352        assert_eq!(map.get(&0x81), Some(&'ź'));
353        assert_eq!(map.get(&0xF3), Some(&'ó'));
354        assert_eq!(map.get(&b'd'), Some(&'d'));
355    }
356
357    #[test]
358    fn budget_exhaustion_emits_mos0032() {
359        // Force overflow: claim every ASCII printable + every Latin-1
360        // codepoint as a WinAnsi native, then ask for 62 extended
361        // chars. ASCII (95) + Latin-1 0xA0..=0xFF (96) = 191 slots
362        // claimed; pool starts at 230, so 39 free + 6 gaps already in
363        // pool ... wait: gaps are in the 230 count, not on top. So
364        // free pool after these claims = 230 - 191 = 39 slots.
365        // Extended chars requested = 62. Overflow = 62 - 39 = 23.
366        let mut all_chars: BTreeSet<char> = BTreeSet::new();
367        for b in 0x20_u8..=0x7E_u8 {
368            all_chars.insert(char::from(b));
369        }
370        for c in '\u{00A0}'..='\u{00FF}' {
371            all_chars.insert(c);
372        }
373        // 62 extended codepoints from the AGL subset (every Latin
374        // Extended-A entry minus those already in WinAnsi like š/Š/ž/Ž).
375        for c in [
376            '\u{0102}', '\u{0103}', '\u{0104}', '\u{0105}', '\u{0106}', '\u{0107}', '\u{010C}',
377            '\u{010D}', '\u{010E}', '\u{010F}', '\u{0110}', '\u{0111}', '\u{0118}', '\u{0119}',
378            '\u{011A}', '\u{011B}', '\u{011E}', '\u{011F}', '\u{0122}', '\u{0123}', '\u{0136}',
379            '\u{0137}', '\u{0139}', '\u{013A}', '\u{013B}', '\u{013C}', '\u{013D}', '\u{013E}',
380            '\u{0141}', '\u{0142}', '\u{0143}', '\u{0144}', '\u{0145}', '\u{0146}', '\u{0147}',
381            '\u{0148}', '\u{0150}', '\u{0151}', '\u{0154}', '\u{0155}', '\u{0156}', '\u{0157}',
382            '\u{0158}', '\u{0159}', '\u{015A}', '\u{015B}', '\u{015E}', '\u{015F}', '\u{0162}',
383            '\u{0163}', '\u{0164}', '\u{0165}', '\u{016E}', '\u{016F}', '\u{0170}', '\u{0171}',
384            '\u{0172}', '\u{0173}', '\u{0179}', '\u{017A}', '\u{017B}', '\u{017C}',
385        ] {
386            all_chars.insert(c);
387        }
388        let mut diags = Vec::new();
389        let enc = plan_face(Base14Font::Helvetica, &all_chars, &mut diags);
390        // 39 differences max; the pool size after WinAnsi claims.
391        // (Some Windows-band codepoints in 0xA0..=0xFF actually claim
392        // bytes that overlap our pool 0x20..=0xFF, which is exactly
393        // the design.)
394        assert!(
395            enc.differences.len() < 62,
396            "expected overflow, got {} differences",
397            enc.differences.len()
398        );
399        assert_eq!(diags.len(), 1, "expected exactly one MOS0032");
400        assert_eq!(diags[0].def().code(), codes::MOS0032.code());
401        assert!(
402            diags[0].message().contains("budget exhausted"),
403            "msg = {:?}",
404            diags[0].message()
405        );
406    }
407
408    #[test]
409    fn symbol_and_dingbats_are_ignored() {
410        let mut p = EncodingPlanner::new();
411        p.observe(Base14Font::Symbol, 'A');
412        p.observe(Base14Font::ZapfDingbats, 'A');
413        let mut diags = Vec::new();
414        let out = p.finalize(&mut diags);
415        assert!(out.is_empty());
416        assert!(diags.is_empty());
417    }
418
419    #[test]
420    fn allocation_order_starts_with_gaps_then_descends_without_dups() {
421        let mut order = allocation_order();
422        assert_eq!(order.next(), Some(0x7F));
423        assert_eq!(order.next(), Some(0x81));
424        assert_eq!(order.next(), Some(0x8D));
425        assert_eq!(order.next(), Some(0x8F));
426        assert_eq!(order.next(), Some(0x90));
427        assert_eq!(order.next(), Some(0x9D));
428        assert_eq!(order.next(), Some(0xFF));
429        assert_eq!(order.next(), Some(0xFE));
430        // 6 gaps + (0xFF - 0x20 + 1 - 6 gaps in that range = 218) = 224.
431        let all: Vec<u8> = allocation_order().collect();
432        assert_eq!(all.len(), 6 + 218);
433        // Every slot appears at most once.
434        let unique: BTreeSet<u8> = all.iter().copied().collect();
435        assert_eq!(
436            unique.len(),
437            all.len(),
438            "allocation_order yields duplicate slots"
439        );
440        // The descending tail should never re-emit a gap byte.
441        for &b in &all[6..] {
442            assert!(
443                !matches!(b, 0x7F | 0x81 | 0x8D | 0x8F | 0x90 | 0x9D),
444                "gap byte 0x{b:02X} re-appears in descending tail"
445            );
446        }
447    }
448
449    #[test]
450    fn differences_have_unique_slots() {
451        // Regression guard for the latent slot-dup bug: even when the
452        // planner has to dip into the descending range past the 0x9D
453        // gap, every entry in `differences` must address a distinct
454        // byte. We feed the full AGL_SUBSET worth of extended chars
455        // through a face that has every glyph (Helvetica), claim every
456        // ASCII printable byte too so the descending pool is exercised,
457        // then assert uniqueness.
458        let mut all_chars: BTreeSet<char> = BTreeSet::new();
459        for b in 0x20_u8..=0x7E_u8 {
460            all_chars.insert(char::from(b));
461        }
462        for c in [
463            '\u{0100}', '\u{0101}', '\u{0102}', '\u{0103}', '\u{0104}', '\u{0105}', '\u{0106}',
464            '\u{0107}', '\u{010C}', '\u{010D}', '\u{010E}', '\u{010F}', '\u{0110}', '\u{0111}',
465            '\u{0112}', '\u{0113}', '\u{0116}', '\u{0117}', '\u{0118}', '\u{0119}', '\u{011A}',
466            '\u{011B}', '\u{011E}', '\u{011F}', '\u{0122}', '\u{0123}', '\u{012A}', '\u{012B}',
467            '\u{012E}', '\u{012F}', '\u{0130}', '\u{0131}', '\u{0136}', '\u{0137}', '\u{0139}',
468            '\u{013A}', '\u{013B}', '\u{013C}', '\u{013D}', '\u{013E}', '\u{0141}', '\u{0142}',
469            '\u{0143}', '\u{0144}', '\u{0145}', '\u{0146}', '\u{0147}', '\u{0148}', '\u{014C}',
470            '\u{014D}', '\u{0150}', '\u{0151}', '\u{0154}', '\u{0155}', '\u{0156}', '\u{0157}',
471            '\u{0158}', '\u{0159}', '\u{015A}', '\u{015B}', '\u{015E}', '\u{015F}', '\u{0162}',
472            '\u{0163}', '\u{0164}', '\u{0165}', '\u{016A}', '\u{016B}', '\u{016E}', '\u{016F}',
473            '\u{0170}', '\u{0171}', '\u{0172}', '\u{0173}', '\u{0179}', '\u{017A}', '\u{017B}',
474            '\u{017C}',
475        ] {
476            all_chars.insert(c);
477        }
478        let mut diags = Vec::new();
479        let enc = plan_face(Base14Font::Helvetica, &all_chars, &mut diags);
480        let slots: BTreeSet<u8> = enc.differences.iter().map(|&(b, _)| b).collect();
481        assert_eq!(
482            slots.len(),
483            enc.differences.len(),
484            "duplicate slot in /Differences: {:?}",
485            enc.differences
486        );
487        // Same uniqueness invariant for the byte → char map.
488        let bytes: BTreeSet<u8> = enc.byte_for_char.values().copied().collect();
489        assert_eq!(
490            bytes.len(),
491            enc.byte_for_char.len(),
492            "byte_for_char maps two chars to the same byte"
493        );
494    }
495
496    #[test]
497    fn differences_are_sorted_by_slot() {
498        // Mixed Czech + Polish + math: should still produce
499        // ascending-slot differences.
500        let (enc, _) = plan(Base14Font::Helvetica, "ł ě √ ≤");
501        for w in enc.differences.windows(2) {
502            assert!(w[0].0 < w[1].0, "out of order: {:?} vs {:?}", w[0], w[1]);
503        }
504    }
505}