mos_layout/boundary.rs
1//! Page boundary signatures (issue #70; design note
2//! `docs/incremental-dependencies.md` §4.5, §6, §7).
3//!
4//! Reflow and fixpoint work (manifest §33) needs to detect *where* pagination
5//! changed between two layout runs without re-diffing whole pages or
6//! serializing PDF output. A [`PageBoundarySignature`] is a compact, stable
7//! digest of one laid-out [`Page`]'s break-defining content; a
8//! [`PageGraphSignature`] is the ordered per-page list, so the first index
9//! where two graphs disagree is exactly where the page breaks diverged.
10//!
11//! This is the §4.5 `PageOutputHash` ("did the laid-out page actually change?")
12//! reduced to the layout primitives that exist today (text runs and image
13//! placements). It is identity/comparison only: no `DepNode` graph, no
14//! `CacheKey` wiring, no reflow loop; those consume these signatures later.
15//!
16//! # What feeds a signature
17//!
18//! Per page, in order: the page number, the quantized page box, then each text
19//! run (quantized position + size, a backend-neutral font identity, text) and
20//! each image placement (intrinsic pixel dimensions + quantized rectangle). Run
21//! and image counts are folded too, so adding or removing one shifts the digest.
22//!
23//! Deliberately **excluded**, per the determinism rules (§5) and the §4.2/§4.3
24//! carve-outs:
25//!
26//! - **Shaped glyphs** on a run: derived from text + font + shaper, so folding
27//! them would bind the signature to a transcoder version it should not care
28//! about. The authored text and font identity stand in for them.
29//! - **The PDF resource name** of a font (`F1`..): a backend emitter slot;
30//! layout must not depend on it, so a backend-neutral font identity is folded
31//! instead (see `font_identity`).
32//! - **Decoded image pixels** (`rgb8`): an asset-content concern (§4.3),
33//! addressed by the asset's own hash, not the page boundary.
34//! - **`resolved_path`** on an image handle: an absolute filesystem path, which
35//! §5 rule 1 forbids from any hash.
36//! - **`handle.id`**: assigned in image-encounter order, so folding it would
37//! churn unrelated pages' signatures when an image is added earlier; the
38//! intrinsic pixel dimensions identify the asset's footprint instead.
39//!
40//! # Quantization
41//!
42//! Every `f32` dimension is snapped to the 1/64-pt grid (§6) before folding, so
43//! two layouts that reach the same length through slightly different arithmetic
44//! agree. The design note specifies an `i32` count of 1/64 pt; we snap to the
45//! same grid and fold the canonical bit pattern of the integral count instead
46//! (see [`quantize_units`]), which is equivalent for hashing and avoids a
47//! lint-denied float-to-int cast. The §9.5 quantization newtype can later adopt
48//! the `i32` form without changing this boundary's observable behavior.
49
50use mos_core::{ContentHash, ContentHasher};
51use mos_fonts::{Base14Font, EmbeddedFontId, Font};
52
53use crate::types::{ImagePlacement, LayoutResult, Page, PageGraph, TextRun};
54
55/// Domain tag separating this boundary from every other `H(...)` (§4).
56const PAGE_DOMAIN: &[u8] = b"mos-layout/page-boundary/v1";
57
58/// A backend-neutral, stable identity for a font face.
59///
60/// Deliberately *not* `Font::pdf_resource_name` (`F1`..): that is a PDF emitter
61/// resource slot, and a layout signature must not depend on backend layout. The
62/// returned tag is owned by this boundary and stable forever; the exhaustive
63/// match means a new bundled face fails to compile here until it is assigned a
64/// tag, so the identity can never silently alias.
65fn font_identity(font: Font) -> &'static [u8] {
66 match font {
67 Font::Base14(Base14Font::Helvetica) => b"b14/helvetica",
68 Font::Base14(Base14Font::HelveticaBold) => b"b14/helvetica-bold",
69 Font::Base14(Base14Font::HelveticaOblique) => b"b14/helvetica-oblique",
70 Font::Base14(Base14Font::HelveticaBoldOblique) => b"b14/helvetica-boldoblique",
71 Font::Base14(Base14Font::TimesRoman) => b"b14/times-roman",
72 Font::Base14(Base14Font::TimesBold) => b"b14/times-bold",
73 Font::Base14(Base14Font::TimesItalic) => b"b14/times-italic",
74 Font::Base14(Base14Font::TimesBoldItalic) => b"b14/times-bolditalic",
75 Font::Base14(Base14Font::Courier) => b"b14/courier",
76 Font::Base14(Base14Font::CourierBold) => b"b14/courier-bold",
77 Font::Base14(Base14Font::CourierOblique) => b"b14/courier-oblique",
78 Font::Base14(Base14Font::CourierBoldOblique) => b"b14/courier-boldoblique",
79 Font::Base14(Base14Font::Symbol) => b"b14/symbol",
80 Font::Base14(Base14Font::ZapfDingbats) => b"b14/zapfdingbats",
81 Font::Embedded(EmbeddedFontId::Regular) => b"emb/noto-sans-regular",
82 Font::Embedded(EmbeddedFontId::Bold) => b"emb/noto-sans-bold",
83 Font::Embedded(EmbeddedFontId::Italic) => b"emb/noto-sans-italic",
84 Font::Embedded(EmbeddedFontId::BoldItalic) => b"emb/noto-sans-bolditalic",
85 Font::Embedded(EmbeddedFontId::Mono) => b"emb/noto-sans-mono",
86 Font::Embedded(EmbeddedFontId::Math) => b"emb/noto-sans-math",
87 }
88}
89
90/// Snap a point measurement to the 1/64-pt grid (§6) and return the canonical
91/// bit pattern of that integral count.
92///
93/// Snapping first means two dimensions within one shaper unit fold equal.
94/// Dividing is avoided entirely: `(pt * 64).round()` is already the integral
95/// count, and while that count stays below `2^24` (i.e. `pt < 262144`, ~five
96/// orders of magnitude above any real page coordinate) it is represented
97/// exactly by `f32`, so its bit pattern is a canonical, collision-free encoding
98/// of that integer. Past `2^24` consecutive counts would alias: out of the
99/// domain layout produces, but the bound the `i32` form (§9.5) would lift.
100/// `-0.0`, `NaN`, and non-finite values normalize to `0` so they cannot create
101/// spurious distinctions.
102fn quantize_units(pt: f32) -> u32 {
103 let units = (pt * 64.0).round();
104 let canonical = if units.is_finite() && units != 0.0 {
105 units
106 } else {
107 0.0
108 };
109 canonical.to_bits()
110}
111
112/// The number of items folded as a count, saturating so an absurd length cannot
113/// silently wrap.
114fn fold_count(hasher: &mut ContentHasher, len: usize) {
115 hasher.u32(u32::try_from(len).unwrap_or(u32::MAX));
116}
117
118fn fold_run(hasher: &mut ContentHasher, run: &TextRun) {
119 hasher
120 .u32(quantize_units(run.x_pt))
121 .u32(quantize_units(run.baseline_from_top_pt))
122 .u32(quantize_units(run.size_pt))
123 .field(font_identity(run.font))
124 .field(run.text.as_bytes());
125 // Optional `/ActualText`: fold a presence flag so `Some("")` and `None`
126 // stay distinct, then the bytes when present.
127 match &run.actual_text {
128 Some(actual) => {
129 hasher.u32(1).field(actual.as_bytes());
130 }
131 None => {
132 hasher.u32(0);
133 }
134 }
135 // `glyphs` excluded: derived from text + font + shaper (§4.2 carve-out).
136}
137
138fn fold_image(hasher: &mut ContentHasher, image: &ImagePlacement) {
139 hasher
140 // Intrinsic pixel dimensions identify the asset's layout footprint.
141 .u32(image.handle.pixel_width)
142 .u32(image.handle.pixel_height)
143 .u32(quantize_units(image.x_pt))
144 .u32(quantize_units(image.top_from_top_pt))
145 .u32(quantize_units(image.width_pt))
146 .u32(quantize_units(image.height_pt));
147 // `handle.id` is excluded: it is assigned in image-encounter order
148 // (`intern_image` uses `image_handles.len()`), so folding it would shift
149 // later images' signatures when an unrelated image is added earlier,
150 // wrecking the page-locality `first_divergence` relies on. `handle.rgb8`
151 // (asset content, §4.3) and `handle.resolved_path` (absolute path, §5
152 // rule 1) are excluded too.
153}
154
155/// A compact, deterministic digest of one laid-out [`Page`]'s break-defining
156/// content (design note §4.5).
157///
158/// Equal for identical pagination of identical content, different when a page's
159/// content or its break changes. It is the cache slot's staleness check, not a
160/// human-readable description: use [`content_hash`](Self::content_hash) to read
161/// the underlying value.
162///
163/// # Examples
164///
165/// ```
166/// use mos_layout::{LayoutEngine, PageBoundarySignature};
167/// use mos_core::Document;
168/// use std::path::PathBuf;
169///
170/// let doc = Document::new(PathBuf::from("doc.mos"));
171/// let result = LayoutEngine::new().layout(&doc);
172/// // Re-signing the same page yields the same signature.
173/// for page in &result.graph.pages {
174/// assert_eq!(
175/// PageBoundarySignature::of_page(page),
176/// PageBoundarySignature::of_page(page),
177/// );
178/// }
179/// ```
180#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
181pub struct PageBoundarySignature(ContentHash);
182
183impl PageBoundarySignature {
184 /// Compute the boundary signature of one laid-out page.
185 #[must_use]
186 pub fn of_page(page: &Page) -> Self {
187 let mut hasher = ContentHasher::new();
188 hasher
189 .field(PAGE_DOMAIN)
190 .u32(page.number)
191 .u32(quantize_units(page.width_pt))
192 .u32(quantize_units(page.height_pt));
193 fold_count(&mut hasher, page.runs.len());
194 for run in &page.runs {
195 fold_run(&mut hasher, run);
196 }
197 fold_count(&mut hasher, page.images.len());
198 for image in &page.images {
199 fold_image(&mut hasher, image);
200 }
201 Self(hasher.finish())
202 }
203
204 /// The underlying content hash.
205 #[must_use]
206 pub const fn content_hash(self) -> ContentHash {
207 self.0
208 }
209}
210
211/// The ordered per-page boundary signatures of a whole [`PageGraph`].
212///
213/// Comparing two graph signatures answers both "did pagination change?"
214/// (inequality) and "where?" ([`first_divergence`](Self::first_divergence)).
215///
216/// # Examples
217///
218/// ```
219/// use mos_layout::{LayoutEngine, PageGraphSignature};
220/// use mos_core::Document;
221/// use std::path::PathBuf;
222///
223/// let doc = Document::new(PathBuf::from("doc.mos"));
224/// let result = LayoutEngine::new().layout(&doc);
225/// let signature = PageGraphSignature::of_graph(&result.graph);
226/// // An unchanged layout signs identically and diverges nowhere.
227/// assert_eq!(signature, PageGraphSignature::of_graph(&result.graph));
228/// assert_eq!(signature.first_divergence(&signature), None);
229/// ```
230#[derive(Clone, Eq, PartialEq, Hash, Debug, Default)]
231pub struct PageGraphSignature(Vec<PageBoundarySignature>);
232
233impl PageGraphSignature {
234 /// Sign every page of `graph`, in page order.
235 #[must_use]
236 pub fn of_graph(graph: &PageGraph) -> Self {
237 Self(
238 graph
239 .pages
240 .iter()
241 .map(PageBoundarySignature::of_page)
242 .collect(),
243 )
244 }
245
246 /// The per-page signatures, in page order.
247 #[must_use]
248 pub fn pages(&self) -> &[PageBoundarySignature] {
249 &self.0
250 }
251
252 /// The index of the first page whose signature differs from `other`, or
253 /// [`None`] if the two are identical.
254 ///
255 /// When one graph is a prefix of the other (a page was added or removed at
256 /// the end), the divergence is the length of the shorter graph; the first
257 /// page index that exists in one but not the other.
258 ///
259 /// # Examples
260 ///
261 /// ```
262 /// use mos_layout::PageGraphSignature;
263 ///
264 /// let empty = PageGraphSignature::default();
265 /// assert_eq!(empty.first_divergence(&empty), None);
266 /// ```
267 #[must_use]
268 pub fn first_divergence(&self, other: &Self) -> Option<usize> {
269 self.0
270 .iter()
271 .zip(other.0.iter())
272 .position(|(a, b)| a != b)
273 .or_else(|| (self.0.len() != other.0.len()).then_some(self.0.len().min(other.0.len())))
274 }
275}
276
277impl LayoutResult {
278 /// The page boundary signatures of this layout's [`PageGraph`] (design note
279 /// §4.5). Convenience for cache/reflow consumers.
280 #[must_use]
281 pub fn page_boundary_signatures(&self) -> PageGraphSignature {
282 PageGraphSignature::of_graph(&self.graph)
283 }
284}
285
286#[cfg(test)]
287mod tests {
288 use std::sync::Arc;
289
290 use mos_fonts::{Base14Font, Font, ShapedGlyph};
291
292 use super::{PageBoundarySignature, PageGraphSignature, quantize_units};
293 use crate::types::{ImageHandle, ImagePlacement, Page, PageGraph, TextRun};
294
295 fn run(text: &str, x_pt: f32) -> TextRun {
296 TextRun {
297 x_pt,
298 baseline_from_top_pt: 100.0,
299 size_pt: 11.0,
300 font: Font::Base14(Base14Font::Helvetica),
301 text: text.to_owned(),
302 actual_text: None,
303 glyphs: Vec::new(),
304 }
305 }
306
307 fn page(number: u32, runs: Vec<TextRun>) -> Page {
308 Page {
309 number,
310 width_pt: 595.276,
311 height_pt: 841.89,
312 runs,
313 images: Vec::new(),
314 }
315 }
316
317 fn graph(pages: Vec<Page>) -> PageGraph {
318 PageGraph {
319 pages,
320 images: Vec::new(),
321 }
322 }
323
324 #[test]
325 fn unchanged_page_signs_identically() {
326 let a = page(1, vec![run("hello", 68.0), run("world", 110.0)]);
327 let b = page(1, vec![run("hello", 68.0), run("world", 110.0)]);
328 assert_eq!(
329 PageBoundarySignature::of_page(&a),
330 PageBoundarySignature::of_page(&b),
331 );
332 }
333
334 #[test]
335 fn changed_content_changes_the_signature() {
336 let base = page(1, vec![run("hello", 68.0)]);
337 let edited_text = page(1, vec![run("hallo", 68.0)]);
338 let moved = page(1, vec![run("hello", 70.0)]);
339 let extra = page(1, vec![run("hello", 68.0), run("more", 68.0)]);
340
341 let base_sig = PageBoundarySignature::of_page(&base);
342 assert_ne!(base_sig, PageBoundarySignature::of_page(&edited_text));
343 assert_ne!(base_sig, PageBoundarySignature::of_page(&moved));
344 assert_ne!(base_sig, PageBoundarySignature::of_page(&extra));
345 }
346
347 #[test]
348 fn page_number_is_folded() {
349 // Identical content on a differently-numbered page must sign
350 // differently; the page number is part of the boundary.
351 let runs = || vec![run("same", 68.0)];
352 assert_ne!(
353 PageBoundarySignature::of_page(&page(1, runs())),
354 PageBoundarySignature::of_page(&page(2, runs())),
355 );
356 }
357
358 #[test]
359 fn sub_shaper_unit_position_changes_are_ignored() {
360 // Two positions within one 1/64-pt unit snap to the same grid cell.
361 let base = page(1, vec![run("x", 68.0)]);
362 let nudged = page(1, vec![run("x", 68.0 + 0.001)]);
363 assert_eq!(
364 PageBoundarySignature::of_page(&base),
365 PageBoundarySignature::of_page(&nudged),
366 );
367 }
368
369 #[test]
370 fn quantize_snaps_to_one_sixty_fourth_point() {
371 // Within a grid cell: equal. Across a cell boundary: distinct.
372 assert_eq!(quantize_units(10.0), quantize_units(10.001));
373 assert_ne!(quantize_units(10.0), quantize_units(10.5));
374 // -0.0, +0.0, and non-finite values all normalize to the zero cell so
375 // they never create spurious distinctions.
376 assert_eq!(quantize_units(0.0), quantize_units(-0.0));
377 assert_eq!(quantize_units(f32::NAN), quantize_units(0.0));
378 assert_eq!(quantize_units(f32::INFINITY), quantize_units(0.0));
379 assert_eq!(quantize_units(f32::NEG_INFINITY), quantize_units(0.0));
380 }
381
382 #[test]
383 fn shaped_glyphs_do_not_affect_the_signature() {
384 // Glyphs are derived from text + font, so two runs that differ only in
385 // their shaped-glyph stream must sign the same.
386 let plain = page(1, vec![run("hi", 68.0)]);
387 let mut shaped_run = run("hi", 68.0);
388 shaped_run.glyphs = vec![ShapedGlyph {
389 gid: 42,
390 advance_units: 500,
391 x_offset_units: 0,
392 y_offset_units: 0,
393 cluster: 0,
394 }];
395 let shaped = page(1, vec![shaped_run]);
396 assert_eq!(
397 PageBoundarySignature::of_page(&plain),
398 PageBoundarySignature::of_page(&shaped),
399 );
400 }
401
402 #[test]
403 fn image_content_path_and_unstable_id_are_excluded_but_footprint_is_not() {
404 let place = |handle: ImageHandle| Page {
405 number: 1,
406 width_pt: 595.276,
407 height_pt: 841.89,
408 runs: Vec::new(),
409 images: vec![ImagePlacement {
410 handle,
411 x_pt: 68.0,
412 top_from_top_pt: 100.0,
413 width_pt: 200.0,
414 height_pt: 150.0,
415 }],
416 };
417 let handle = |id: u32, path: &str, rgb8: &[u8]| ImageHandle {
418 id,
419 resolved_path: path.to_owned(),
420 pixel_width: 2,
421 pixel_height: 1,
422 rgb8: Arc::from(rgb8.to_vec()),
423 };
424 // Different encounter-order id, different absolute path, and different
425 // decoded bytes, but the same layout footprint: the signature must not
426 // change. None of those belong in a deterministic, locality-preserving
427 // page boundary.
428 let a = place(handle(7, "/home/alice/fig.png", &[1, 2, 3, 4, 5, 6]));
429 let b = place(handle(8, "/home/bob/fig.png", &[9, 9, 9, 9, 9, 9]));
430 assert_eq!(
431 PageBoundarySignature::of_page(&a),
432 PageBoundarySignature::of_page(&b),
433 );
434 // The intrinsic pixel size *is* part of the footprint (the signature
435 // reads dimensions, not the pixel buffer), so a differently-sized asset
436 // signs differently.
437 let mut resized = handle(7, "/home/alice/fig.png", &[1, 2, 3, 4, 5, 6]);
438 resized.pixel_width = 4;
439 assert_ne!(
440 PageBoundarySignature::of_page(&a),
441 PageBoundarySignature::of_page(&place(resized)),
442 );
443 }
444
445 #[test]
446 fn graph_signature_localizes_a_pagination_change() {
447 // Page 1 unchanged; page 2 gains a run (a break moved). The graph
448 // signatures must diverge first at index 1.
449 let before = graph(vec![
450 page(1, vec![run("a", 68.0)]),
451 page(2, vec![run("b", 68.0)]),
452 ]);
453 let after = graph(vec![
454 page(1, vec![run("a", 68.0)]),
455 page(2, vec![run("b", 68.0), run("c", 110.0)]),
456 ]);
457 let before_sig = PageGraphSignature::of_graph(&before);
458 let after_sig = PageGraphSignature::of_graph(&after);
459
460 assert_ne!(before_sig, after_sig);
461 assert_eq!(before_sig.first_divergence(&after_sig), Some(1));
462 assert_eq!(before_sig.first_divergence(&before_sig), None);
463 }
464
465 #[test]
466 fn graph_signature_flags_an_added_trailing_page() {
467 let short = graph(vec![page(1, vec![run("a", 68.0)])]);
468 let long = graph(vec![
469 page(1, vec![run("a", 68.0)]),
470 page(2, vec![run("b", 68.0)]),
471 ]);
472 let short_sig = PageGraphSignature::of_graph(&short);
473 let long_sig = PageGraphSignature::of_graph(&long);
474 // The shared page 0 matches; divergence is the new trailing index.
475 assert_eq!(short_sig.first_divergence(&long_sig), Some(1));
476 assert_eq!(short_sig.pages().len(), 1);
477 assert_eq!(long_sig.pages().len(), 2);
478 }
479}