Skip to main content

mos_layout/
lib.rs

1//! Layout engine for Mosaic.
2//!
3//! MVP 0 implements the smallest end-to-end slice that gets ink on a
4//! page: greedy line-breaking against fixed A4 metrics, walking a
5//! lowered [`Document`] into a [`PageGraph`]. Real shaping
6//! (`HarfBuzz`/`rustybuzz`), Knuth-Plass, hyphenation, and font
7//! embedding are deferred per the manifest's MVP roadmap (§30,
8//! §22.1, §22.2). Boundary-state reuse for incremental builds
9//! (§22.3, §33) is also out of scope here.
10
11#![doc(
12    html_logo_url = "https://mosaic.kjanat.dev/assets/A4.svg",
13    html_favicon_url = "https://mosaic.kjanat.dev/assets/A4.svg"
14)]
15
16pub use boundary::{PageBoundarySignature, PageGraphSignature};
17use mos_fonts::nfc_text;
18pub use mos_fonts::{
19    Base14Font, EmbeddedFontId, Font, FontFamily, ShapedGlyph, WordSubRun, ascent, descent,
20    glyph_width, shape_with_fallback, text_width,
21};
22pub use style::paper_size_pt;
23pub use types::{
24    A4_HEIGHT_PT, A4_WIDTH_PT, ImageHandle, ImagePlacement, LayoutResult, MARGIN_PT, Page,
25    PageGraph, PageStyle, TextRun, TextStyle,
26};
27
28use std::collections::BTreeMap;
29
30use mos_core::{AttrValue, Diagnostic, Document, Node, NodeKind};
31use style::resolve_styles;
32use support::{blank_page, expand_tabs, read_level, read_str_attr};
33use types::BODY_LEADING;
34use word::{ShyBreak, Word, WordItem, split_soft_hyphens, try_shy_break, word_clusters};
35
36mod boundary;
37mod image;
38mod list;
39mod style;
40mod support;
41mod types;
42mod word;
43
44/// Heading sizes by level (1-indexed). Anything beyond level 3 falls
45/// back to body size: counters and section numbering land in MVP 1.
46const HEADING_SIZES_PT: [f32; 3] = [20.0, 16.0, 13.0];
47/// Space above each heading level (skipped for the first block on a
48/// page).
49const HEADING_SPACE_BEFORE_PT: [f32; 3] = [16.0, 12.0, 10.0];
50/// Space below each heading level.
51const HEADING_SPACE_AFTER_PT: [f32; 3] = [10.0, 8.0, 6.0];
52/// Vertical gap between consecutive paragraphs.
53const PARA_SPACE_AFTER_PT: f32 = 4.0;
54/// Horizontal gutter reserved for the list marker (`•` for unordered,
55/// `1.` for ordered) on each nesting level. Doubles as the per-level
56/// indent step: nested items shift right by this many points before
57/// their own gutter is added. Sized to comfortably hold a one- or
58/// two-digit ordered marker at the default body size; lists with three-
59/// digit numbering will overflow the gutter visually until per-list
60/// gutter tuning lands.
61const LIST_MARKER_GUTTER_PT: f32 = 18.0;
62/// Number of columns represented by one tab in raw code/pre blocks.
63const RAW_BLOCK_TAB_WIDTH: usize = 4;
64
65/// The driver for MVP 0 layout.
66///
67/// # Examples
68///
69/// ```
70/// use mos_layout::LayoutEngine;
71///
72/// let engine = LayoutEngine::new();
73///
74/// assert_eq!(format!("{engine:?}"), "LayoutEngine");
75/// ```
76#[derive(Debug, Default)]
77pub struct LayoutEngine;
78
79impl LayoutEngine {
80    /// Construct a layout engine.
81    ///
82    /// # Examples
83    ///
84    /// ```
85    /// use mos_layout::LayoutEngine;
86    ///
87    /// let engine = LayoutEngine::new();
88    ///
89    /// assert_eq!(format!("{engine:?}"), "LayoutEngine");
90    /// ```
91    #[must_use]
92    pub fn new() -> Self {
93        Self
94    }
95
96    /// Lay out `document` into a [`PageGraph`]. Never returns an
97    /// error in MVP 0: invalid blocks are skipped and surfaced as
98    /// diagnostics on `LayoutResult` instead.
99    ///
100    /// # Examples
101    ///
102    /// ```
103    /// use std::path::PathBuf;
104    ///
105    /// use mos_core::Document;
106    /// use mos_layout::LayoutEngine;
107    ///
108    /// let doc = Document::new(PathBuf::from("main.mos"));
109    /// let result = LayoutEngine::new().layout(&doc);
110    ///
111    /// assert_eq!(result.graph.pages.len(), 1);
112    /// ```
113    pub fn layout(&mut self, document: &Document) -> LayoutResult {
114        let (page_style, text_style, mut diagnostics) = resolve_styles(document);
115        let mut state = LayoutState::new(page_style, text_style);
116        state.diagnostics.append(&mut diagnostics);
117        let Some(root) = document.get(document.root) else {
118            return state.finish();
119        };
120        for child_id in &root.children {
121            let Some(node) = document.get(*child_id) else {
122                continue;
123            };
124            // Queue this block's label (if any) so it binds to the page its
125            // first content actually lands on (issue #72), not the page the
126            // cursor happens to sit on before a break.
127            state.queue_label(node);
128            match node.kind {
129                NodeKind::Section => state.layout_heading(document, node),
130                NodeKind::Paragraph => state.layout_paragraph(document, node),
131                NodeKind::Image => state.layout_image(*child_id, node),
132                NodeKind::Figure => state.layout_figure(document, node),
133                NodeKind::List => state.layout_list(document, node),
134                NodeKind::Raw if node.attributes.contains_key("raw.kind") => {
135                    state.layout_raw_block(node);
136                }
137                // `#set` blocks are stashed as `Raw` children of the
138                // root; folded into styles by `resolve_styles` above.
139                NodeKind::Raw if node.attributes.contains_key("set") => {}
140                _ => {
141                    // Unknown top-level kinds (Table, Equation, etc.)
142                    // arrive in MVP 1+; ignore so MVP 0 doesn't panic
143                    // on forward-compatible input.
144                }
145            }
146            // This block is fully laid out. Any label still queued belongs to a
147            // block that emitted no content (an empty paragraph, an unsupported
148            // kind, a `#set` block); drop it so it never binds to a later
149            // block's page (issue #72).
150            state.discard_unbound_labels();
151        }
152        state.finish()
153    }
154}
155
156/// Mutable cursor + accumulator threaded through the layout.
157struct LayoutState {
158    pages: Vec<Page>,
159    /// In-progress page being filled.
160    current_page: Page,
161    /// Y position of the next baseline, measured from page top.
162    cursor_y: f32,
163    /// Whether `current_page` has had any block emitted yet (controls
164    /// `space_before` skipping).
165    page_has_content: bool,
166    diagnostics: Vec<Diagnostic>,
167    page: PageStyle,
168    text: TextStyle,
169    /// Image dedup table: resolved path → handle. Two `#image(...)`
170    /// directives that reference the same on-disk file share one
171    /// [`ImageHandle`] (and therefore one `XObject` in the emitted PDF).
172    image_handles: Vec<ImageHandle>,
173    /// Left edge of the current text column. Equals `page.margin_pt`
174    /// at the top level; list layout pushes this rightward so item
175    /// text hangs into the gutter under its marker.
176    current_left_pt: f32,
177    /// Marker run to emit at the start of the next flushed line. Used
178    /// by list items to draw `•` / `1.` in the gutter to the left of
179    /// `current_left_pt` on the first line of each item. Cleared by
180    /// `flush_line` once the marker is committed to a page.
181    pending_marker: Option<PendingMarker>,
182    /// Labels of blocks dispatched but not yet committed to a page. Bound
183    /// to the page their first content lands on (issue #72): see
184    /// [`LayoutState::bind_pending_labels`].
185    pending_labels: Vec<String>,
186    /// Built result of label → 1-based start page. Emitted into the
187    /// [`PageGraph`] by [`LayoutState::finish`].
188    label_pages: BTreeMap<String, u32>,
189}
190
191#[derive(Clone, Debug)]
192struct PendingMarker {
193    /// X position (page-relative, points from the page's left edge)
194    /// where the marker's left edge should sit.
195    x_pt: f32,
196    /// Pre-shaped marker word. Width is informational only: the
197    /// marker is drawn outside `current_left_pt` so it doesn't reserve
198    /// space in the text column.
199    word: Word,
200}
201
202impl LayoutState {
203    fn new(page: PageStyle, text: TextStyle) -> Self {
204        Self {
205            pages: Vec::new(),
206            current_page: blank_page(1, page),
207            cursor_y: page.margin_pt,
208            page_has_content: false,
209            diagnostics: Vec::new(),
210            page,
211            text,
212            image_handles: Vec::new(),
213            current_left_pt: page.margin_pt,
214            pending_marker: None,
215            pending_labels: Vec::new(),
216            label_pages: BTreeMap::new(),
217        }
218    }
219
220    /// Queue a block's `label` attribute (if present) for binding to the
221    /// page its first content commits to (issue #72).
222    fn queue_label(&mut self, node: &Node) {
223        if let Some(AttrValue::Str(label)) = node.attributes.get("label") {
224            self.pending_labels.push(label.clone());
225        }
226    }
227
228    /// Drop labels still queued after a block finished laying out. The block
229    /// (or its label) committed no content, so the label has no page; clearing
230    /// it keeps a labelled no-content block out of `label_pages` instead of
231    /// letting its label leak onto the next block that does emit content. Only
232    /// the just-dispatched block's label can be queued here, since
233    /// [`queue_label`](Self::queue_label) runs once per top-level block.
234    fn discard_unbound_labels(&mut self) {
235        self.pending_labels.clear();
236    }
237
238    /// Bind every queued label to the current page. Called at each
239    /// first-content-commit site *after* its page-break check, so a label
240    /// maps to where its target actually lands. First placement wins
241    /// (`or_insert`), matching the resolver's first-occurrence label rule.
242    fn bind_pending_labels(&mut self) {
243        if self.pending_labels.is_empty() {
244            return;
245        }
246        let page = self.current_page.number;
247        for label in self.pending_labels.drain(..) {
248            self.label_pages.entry(label).or_insert(page);
249        }
250    }
251
252    fn column_width_pt(&self) -> f32 {
253        self.page.width_pt - self.page.margin_pt - self.current_left_pt
254    }
255
256    fn finish(mut self) -> LayoutResult {
257        // Always emit the last page even if empty so the PDF is valid
258        // (a Pages tree with `Count 0` is illegal); only skip when an
259        // earlier page already accumulated content and the trailing
260        // page is genuinely blank.
261        if self.page_has_content || self.pages.is_empty() {
262            self.pages.push(self.current_page);
263        }
264        LayoutResult {
265            graph: PageGraph {
266                pages: self.pages,
267                images: self.image_handles,
268            },
269            diagnostics: self.diagnostics,
270            label_pages: self.label_pages,
271        }
272    }
273
274    fn layout_heading(&mut self, document: &Document, section: &Node) {
275        let level = usize::from(read_level(section).unwrap_or(1).clamp(1, 3));
276        let size = HEADING_SIZES_PT[level - 1];
277        let space_before = HEADING_SPACE_BEFORE_PT[level - 1];
278        let space_after = HEADING_SPACE_AFTER_PT[level - 1];
279
280        if self.page_has_content {
281            self.cursor_y += space_before;
282        }
283        let bold = self.text.family.bold;
284        let mut words = self.collect_words(document, section, bold, size);
285        // Resolver-assigned section number is rendered as a leading
286        // word so it gets the same font/size as the title and flows
287        // through the existing line-break path. The trailing `.` is
288        // the conventional "1." style; `#set heading(numbering: ...)`
289        // (manifest §4) overrides it once `#set` is interpreted.
290        if let Some(number) = read_str_attr(section, "number") {
291            let prefix = format!("{number}.");
292            let subruns = shape_with_fallback(bold, self.text.family.fallbacks, size, &prefix);
293            let width_pt: f32 = subruns.iter().map(|s| s.advance_pt).sum();
294            words.insert(
295                0,
296                WordItem::Word(Word {
297                    text: prefix,
298                    actual_text: None,
299                    font: bold,
300                    size_pt: size,
301                    width_pt,
302                    subruns,
303                    shy_break_offsets: Vec::new(),
304                }),
305            );
306        }
307        self.flow_words(&words, BODY_LEADING);
308        self.cursor_y += space_after;
309    }
310
311    fn layout_paragraph(&mut self, document: &Document, paragraph: &Node) {
312        let size = self.text.size_pt;
313        let leading = self.text.leading;
314        let regular = self.text.family.regular;
315        let words = self.collect_words(document, paragraph, regular, size);
316        self.flow_words(&words, leading);
317        self.cursor_y += PARA_SPACE_AFTER_PT;
318    }
319
320    fn layout_raw_block(&mut self, raw: &Node) {
321        let Some(AttrValue::Str(text)) = raw.attributes.get("text") else {
322            return;
323        };
324        let size = self.text.size_pt;
325        let leading = self.text.leading;
326        let font = self.text.family.monospace;
327        let mut emitted = false;
328        for line in text.lines() {
329            if line.is_empty() {
330                if !self.page_has_content {
331                    self.cursor_y = self.page.margin_pt + ascent(font, size);
332                    self.page_has_content = true;
333                }
334                self.cursor_y += size * leading;
335                continue;
336            }
337            let expanded_line = expand_tabs(line, RAW_BLOCK_TAB_WIDTH);
338            let subruns = shape_with_fallback(
339                font,
340                self.text.family.fallbacks,
341                size,
342                expanded_line.as_ref(),
343            );
344            let width_pt: f32 = subruns.iter().map(|s| s.advance_pt).sum();
345            let actual_text = (expanded_line.as_ref() != line).then(|| line.to_owned());
346            let word = Word {
347                text: expanded_line.into_owned(),
348                actual_text,
349                font,
350                size_pt: size,
351                width_pt,
352                subruns,
353                shy_break_offsets: Vec::new(),
354            };
355            self.flow_words(&[WordItem::Word(word)], leading);
356            emitted = true;
357        }
358        if emitted {
359            self.cursor_y += PARA_SPACE_AFTER_PT;
360        }
361    }
362
363    /// Walk `parent`'s inline children and produce a flat list of
364    /// [`WordItem`]s. Inline whitespace inside text runs collapses to
365    /// a single split point (`split_ascii_whitespace` handles
366    /// `\n`/`\r`/`\t` uniformly **and intentionally preserves U+00A0
367    /// NBSP**: non-ASCII whitespace stays inside the word so the
368    /// breaker never splits at NBSP). Each word is shaped once here;
369    /// the resulting glyphs and width flow through to [`TextRun`]
370    /// without re-shaping during line breaking. `NodeKind::HardBreak`
371    /// children are emitted as `WordItem::HardBreak` sentinels.
372    fn collect_words(
373        &mut self,
374        document: &Document,
375        parent: &Node,
376        default_font: Font,
377        size: f32,
378    ) -> Vec<WordItem> {
379        let mut out: Vec<WordItem> = Vec::new();
380        for child_id in &parent.children {
381            let Some(child) = document.get(*child_id) else {
382                continue;
383            };
384            if matches!(child.kind, NodeKind::HardBreak) {
385                out.push(WordItem::HardBreak);
386                continue;
387            }
388            let font = match child.kind {
389                NodeKind::Strong => self.text.family.bold,
390                NodeKind::Emphasis => self.text.family.italic,
391                NodeKind::BoldItalic => self.text.family.bold_italic,
392                NodeKind::Raw => self.text.family.monospace,
393                // Nested list blocks under a `ListItem` are laid out
394                // separately by `layout_list`; skip them here so they
395                // don't leak into the parent item's word stream.
396                NodeKind::List | NodeKind::ListItem => continue,
397                _ => default_font,
398            };
399            let raw = match child.attributes.get("text") {
400                Some(AttrValue::Str(s)) => s.as_str(),
401                _ => continue,
402            };
403            // U+00A0 NBSP is intentionally preserved by
404            // `split_ascii_whitespace` (it only splits on ASCII
405            // whitespace), keeping `Mr.\u{A0}Smith` as one logical
406            // word. This is the documented contract.
407            for piece in raw.split_ascii_whitespace() {
408                if piece.is_empty() {
409                    continue;
410                }
411                let piece = nfc_text(piece);
412                let piece = piece.as_ref();
413                // Strip U+00AD before shaping so SHY never renders as
414                // a visible hyphen on either the embedded or Base-14
415                // path. Keep the codepoint offsets so a future
416                // Knuth-Plass breaker can hyphenate at the author's
417                // marked positions.
418                let (stripped, shy_offsets) = split_soft_hyphens(piece);
419                // A piece that was entirely SHY codepoints leaves
420                // nothing to shape; skip it so we don't emit a
421                // phantom zero-width word that would inflate the
422                // interword gap on either side.
423                if stripped.is_empty() {
424                    continue;
425                }
426                let subruns =
427                    shape_with_fallback(font, self.text.family.fallbacks, size, &stripped);
428                let width_pt: f32 = subruns.iter().map(|s| s.advance_pt).sum();
429                out.push(WordItem::Word(Word {
430                    text: stripped,
431                    actual_text: None,
432                    font,
433                    size_pt: size,
434                    width_pt,
435                    subruns,
436                    shy_break_offsets: shy_offsets,
437                }));
438            }
439        }
440        out
441    }
442
443    /// Greedy line-break `items` and emit text runs onto the page,
444    /// paginating as we go. `leading` is the line-height multiplier
445    /// applied per line. `WordItem::HardBreak` forces a line flush
446    /// at its position (and produces a blank line when two hard
447    /// breaks are adjacent or one lands mid-paragraph with no words
448    /// behind it).
449    fn flow_words(&mut self, items: &[WordItem], leading: f32) {
450        if items.is_empty() {
451            return;
452        }
453        let line_width = self.column_width_pt();
454        let mut line: Vec<Word> = Vec::new();
455        let mut line_width_used = 0.0_f32;
456        // Paragraph-local state so hard-break collapsing follows
457        // block-boundary semantics rather than page-state ones:
458        // * `paragraph_emitted_line` is true once anything in this
459        //   paragraph has emitted vertical space (a flushed line, a
460        //   wrapped overflow, or an oversize chunk).
461        // * `last_was_hardbreak_flush` is true only after a hard
462        //   break flushed (or stacked onto) a line. Stacked hard
463        //   breaks emit blank lines; a hard break following an
464        //   implicit break (oversize / soft wrap) is absorbed without
465        //   adding a blank line, matching the author's natural
466        //   reading of "force a break here" when the line just
467        //   ended on its own.
468        let mut paragraph_emitted_line = false;
469        let mut last_was_hardbreak_flush = false;
470        // Suffix produced by a SHY split takes priority over the
471        // next `items` entry. A single source word with several SHYs
472        // may break twice (`super\-cali\-fragil\-istic` on a narrow
473        // column), so the suffix re-enters the same dispatch loop.
474        let mut pending: Option<Word> = None;
475        let mut item_idx = 0;
476
477        loop {
478            let word_owned: Word = if let Some(w) = pending.take() {
479                w
480            } else if item_idx < items.len() {
481                let item = &items[item_idx];
482                item_idx += 1;
483                match item {
484                    WordItem::Word(w) => w.clone(),
485                    WordItem::HardBreak => {
486                        if !line.is_empty() {
487                            self.flush_line(&line, leading);
488                            line.clear();
489                            line_width_used = 0.0;
490                            paragraph_emitted_line = true;
491                            last_was_hardbreak_flush = true;
492                        } else if last_was_hardbreak_flush {
493                            // Stacked hard breaks: emit a blank line
494                            // and remain in the "just hard-broke"
495                            // state so a third break emits another
496                            // blank.
497                            self.cursor_y += self.text.size_pt * leading;
498                        } else if paragraph_emitted_line {
499                            // First hard break after an implicit
500                            // break (oversize chunk or soft-wrap
501                            // flush). The cursor already advanced
502                            // past the previous line, so this break
503                            // is absorbed silently. Promote the
504                            // state so a *second* stacked hard
505                            // break still produces a blank line.
506                            last_was_hardbreak_flush = true;
507                        }
508                        // else: paragraph hasn't emitted anything
509                        // yet -- leading hard breaks collapse
510                        // silently regardless of whether prior
511                        // blocks have painted on the page.
512                        continue;
513                    }
514                }
515            } else {
516                break;
517            };
518
519            let space_w = if line.is_empty() {
520                0.0
521            } else {
522                text_width(word_owned.font, word_owned.size_pt, " ")
523            };
524
525            // Word fits on the current line: append and continue.
526            if line_width_used + space_w + word_owned.width_pt <= line_width {
527                line_width_used += space_w + word_owned.width_pt;
528                line.push(word_owned);
529                continue;
530            }
531
532            // Word does not fit. First try a SHY break that lets us
533            // keep filling the current partially-occupied line.
534            if !line.is_empty()
535                && let Some(ShyBreak { prefix, suffix }) = try_shy_break(
536                    &word_owned,
537                    line_width - line_width_used - space_w,
538                    self.text.family.fallbacks,
539                )
540            {
541                line.push(prefix);
542                self.flush_line(&line, leading);
543                line.clear();
544                line_width_used = 0.0;
545                paragraph_emitted_line = true;
546                last_was_hardbreak_flush = false;
547                pending = Some(suffix);
548                continue;
549            }
550
551            // Either no SHY fit the current line, or the line was
552            // already empty. Flush any in-progress line and decide
553            // what to do on a fresh empty line.
554            if !line.is_empty() {
555                self.flush_line(&line, leading);
556                line.clear();
557                line_width_used = 0.0;
558                paragraph_emitted_line = true;
559                last_was_hardbreak_flush = false;
560            }
561
562            // On the now-empty line: if the word still doesn't fit
563            // the full column, try a SHY break against the empty
564            // line before falling back to cluster chopping.
565            if word_owned.width_pt > line_width {
566                if let Some(ShyBreak { prefix, suffix }) =
567                    try_shy_break(&word_owned, line_width, self.text.family.fallbacks)
568                {
569                    line.push(prefix);
570                    self.flush_line(&line, leading);
571                    line.clear();
572                    line_width_used = 0.0;
573                    paragraph_emitted_line = true;
574                    last_was_hardbreak_flush = false;
575                    pending = Some(suffix);
576                    continue;
577                }
578                self.flush_oversize_word(&word_owned, leading);
579                paragraph_emitted_line = true;
580                last_was_hardbreak_flush = false;
581                continue;
582            }
583
584            // Word fits the empty line as-is (a plain soft wrap).
585            line_width_used = word_owned.width_pt;
586            line.push(word_owned);
587        }
588        if !line.is_empty() {
589            self.flush_line(&line, leading);
590        }
591    }
592
593    /// Emit one line worth of words at `cursor_y`, advancing past it.
594    /// Computes the line's typographic metrics from `line` itself so
595    /// the caller doesn't have to track them in parallel.
596    fn flush_line(&mut self, line: &[Word], leading: f32) {
597        // The marker participates in the line's vertical metrics so a
598        // taller marker still gets the right baseline. In practice the
599        // marker uses the body face at body size, but folding it in
600        // costs nothing and avoids surprises if list layout grows the
601        // ability to override marker size later.
602        let marker_size = self
603            .pending_marker
604            .as_ref()
605            .map_or(0.0_f32, |m| m.word.size_pt);
606        let marker_ascent = self.pending_marker.as_ref().map_or(0.0_f32, |m| {
607            m.word
608                .subruns
609                .iter()
610                .map(|sub| ascent(sub.font, m.word.size_pt))
611                .fold(0.0_f32, f32::max)
612        });
613        let max_size = line.iter().map(|w| w.size_pt).fold(marker_size, f32::max);
614        let max_ascent = line
615            .iter()
616            .flat_map(|w| w.subruns.iter().map(|sub| ascent(sub.font, w.size_pt)))
617            .fold(marker_ascent, f32::max);
618
619        // First line on a page: drop the baseline by the line's
620        // ascent so the glyph tops sit at the top margin.
621        if !self.page_has_content {
622            self.cursor_y = self.page.margin_pt + max_ascent;
623        }
624        // Page break if the baseline would fall below the bottom
625        // margin. Descent is small and absorbed by the bottom margin.
626        if self.cursor_y > self.page.height_pt - self.page.margin_pt {
627            self.start_new_page();
628            self.cursor_y = self.page.margin_pt + max_ascent;
629        }
630
631        // The page is now settled for this line; bind any labels waiting on
632        // their first content to it (issue #72).
633        self.bind_pending_labels();
634
635        // Marker (`•` / `1.` …) is drawn in the gutter to the left of
636        // `current_left_pt` once the baseline is locked. Consumed on
637        // emit so subsequent wrapped lines of the same item don't
638        // restamp the marker.
639        if let Some(marker) = self.pending_marker.take() {
640            let mut marker_x = marker.x_pt;
641            for sub in marker.word.subruns {
642                self.current_page.runs.push(TextRun {
643                    x_pt: marker_x,
644                    baseline_from_top_pt: self.cursor_y,
645                    size_pt: marker.word.size_pt,
646                    font: sub.font,
647                    text: sub.text,
648                    actual_text: None,
649                    glyphs: sub.glyphs,
650                });
651                marker_x += sub.advance_pt;
652            }
653        }
654
655        let mut x = self.current_left_pt;
656        for (i, word) in line.iter().enumerate() {
657            if i > 0 {
658                x += text_width(word.font, word.size_pt, " ");
659            }
660            // One TextRun per sub-run: same baseline, x advances by
661            // each sub-run's `advance_pt`. PDF emit's per-run `Tf`
662            // switch fires naturally at the font boundary between
663            // sub-runs (Latin → Math → Latin in `a≤b`-style runs).
664            for sub in &word.subruns {
665                self.current_page.runs.push(TextRun {
666                    x_pt: x,
667                    baseline_from_top_pt: self.cursor_y,
668                    size_pt: word.size_pt,
669                    font: sub.font,
670                    text: sub.text.clone(),
671                    actual_text: word.actual_text.clone(),
672                    glyphs: sub.glyphs.clone(),
673                });
674                x += sub.advance_pt;
675            }
676        }
677        self.page_has_content = true;
678        self.cursor_y += max_size * leading;
679    }
680
681    /// Emit a word that's wider than the column by chopping it on
682    /// already-shaped cluster boundaries. The word was shaped when it
683    /// was collected, so this avoids re-running rustybuzz for every
684    /// growing prefix of a degenerate long word.
685    fn flush_oversize_word(&mut self, word: &Word, leading: f32) {
686        let line_width = self.column_width_pt();
687        let mut chunk_text = String::with_capacity(word.text.len());
688        let mut chunk_width = 0.0_f32;
689        let mut chunk_subruns = Vec::new();
690        for cluster in word_clusters(word) {
691            if chunk_width + cluster.advance_pt > line_width && !chunk_subruns.is_empty() {
692                self.flush_oversize_chunk(
693                    std::mem::take(&mut chunk_text),
694                    chunk_width,
695                    std::mem::take(&mut chunk_subruns),
696                    word,
697                    leading,
698                );
699                chunk_width = 0.0;
700            }
701            chunk_text.push_str(&cluster.text);
702            chunk_width += cluster.advance_pt;
703            chunk_subruns.push(cluster);
704        }
705        if !chunk_subruns.is_empty() {
706            self.flush_oversize_chunk(chunk_text, chunk_width, chunk_subruns, word, leading);
707        }
708    }
709
710    fn flush_oversize_chunk(
711        &mut self,
712        text: String,
713        width_pt: f32,
714        subruns: Vec<WordSubRun>,
715        source: &Word,
716        leading: f32,
717    ) {
718        self.flush_line(
719            &[Word {
720                text,
721                actual_text: None,
722                font: source.font,
723                size_pt: source.size_pt,
724                width_pt,
725                subruns,
726                shy_break_offsets: Vec::new(),
727            }],
728            leading,
729        );
730    }
731
732    fn start_new_page(&mut self) {
733        let next_number = self.current_page.number + 1;
734        let finished =
735            std::mem::replace(&mut self.current_page, blank_page(next_number, self.page));
736        self.pages.push(finished);
737        self.cursor_y = self.page.margin_pt;
738        self.page_has_content = false;
739    }
740}
741
742#[cfg(test)]
743mod tests {
744    #![allow(
745        clippy::unwrap_used,
746        clippy::expect_used,
747        reason = "tests panic loudly on setup failure; matches crate-wide test-module convention"
748    )]
749    use std::path::PathBuf;
750
751    use mos_core::{AttrMap, AttrValue, Document, NodeId, NodeKind, NodeSpec, SourceSpan};
752
753    use crate::types::BODY_SIZE_PT;
754
755    use super::*;
756
757    fn alloc_inline(doc: &mut Document, parent: NodeId, kind: NodeKind, text: &str) {
758        let mut attrs = AttrMap::new();
759        attrs.insert("text".to_owned(), AttrValue::Str(text.to_owned()));
760        doc.alloc_child(
761            parent,
762            NodeSpec::new(kind, SourceSpan::placeholder(PathBuf::from("test.mos")))
763                .with_attributes(attrs),
764        );
765    }
766
767    /// Tests that assert Base14 font variants on `TextRun` need to opt
768    /// out of the default Noto Sans family. Prepend a `#set
769    /// text(font: "Helvetica")` block so the family resolves to
770    /// Base14 Helvetica.
771    fn pin_helvetica(doc: &mut Document) {
772        let mut attrs = AttrMap::new();
773        attrs.insert("set".to_owned(), AttrValue::Str("text".to_owned()));
774        attrs.insert(
775            "set.arg.font".to_owned(),
776            AttrValue::Str("Helvetica".to_owned()),
777        );
778        doc.alloc_child(
779            doc.root,
780            NodeSpec::new(
781                NodeKind::Raw,
782                SourceSpan::placeholder(PathBuf::from("test.mos")),
783            )
784            .with_attributes(attrs),
785        );
786    }
787
788    fn make_section(doc: &mut Document, level: i64, text: &str) -> NodeId {
789        let mut attrs = AttrMap::new();
790        attrs.insert("level".to_owned(), AttrValue::Int(level));
791        let id = doc.alloc_child(
792            doc.root,
793            NodeSpec::new(
794                NodeKind::Section,
795                SourceSpan::placeholder(PathBuf::from("test.mos")),
796            )
797            .with_attributes(attrs),
798        );
799        alloc_inline(doc, id, NodeKind::Text, text);
800        id
801    }
802
803    fn make_paragraph(doc: &mut Document, text: &str) -> NodeId {
804        let id = doc.alloc_child(
805            doc.root,
806            NodeSpec::new(
807                NodeKind::Paragraph,
808                SourceSpan::placeholder(PathBuf::from("test.mos")),
809            ),
810        );
811        alloc_inline(doc, id, NodeKind::Text, text);
812        id
813    }
814
815    fn make_labelled_paragraph(doc: &mut Document, text: &str, label: &str) -> NodeId {
816        let mut attrs = AttrMap::new();
817        attrs.insert("label".to_owned(), AttrValue::Str(label.to_owned()));
818        let id = doc.alloc_child(
819            doc.root,
820            NodeSpec::new(
821                NodeKind::Paragraph,
822                SourceSpan::placeholder(PathBuf::from("test.mos")),
823            )
824            .with_attributes(attrs),
825        );
826        alloc_inline(doc, id, NodeKind::Text, text);
827        id
828    }
829
830    fn make_raw_block(doc: &mut Document, text: &str) -> NodeId {
831        let mut attrs = AttrMap::new();
832        attrs.insert("raw.kind".to_owned(), AttrValue::Str("code".to_owned()));
833        attrs.insert("text".to_owned(), AttrValue::Str(text.to_owned()));
834        doc.alloc_child(
835            doc.root,
836            NodeSpec::new(
837                NodeKind::Raw,
838                SourceSpan::placeholder(PathBuf::from("test.mos")),
839            )
840            .with_attributes(attrs),
841        )
842    }
843
844    #[test]
845    fn heading_then_paragraph_emits_runs_in_order() {
846        let mut doc = Document::new(PathBuf::from("test.mos"));
847        pin_helvetica(&mut doc);
848        make_section(&mut doc, 1, "Hello");
849        make_paragraph(&mut doc, "body");
850        let result = LayoutEngine::new().layout(&doc);
851        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
852        assert_eq!(result.graph.pages.len(), 1);
853        let runs = &result.graph.pages[0].runs;
854        assert!(runs.len() >= 2, "expected at least 2 runs, got {runs:?}");
855        // Heading first, body below it.
856        assert!(matches!(
857            runs[0].font,
858            Font::Base14(Base14Font::HelveticaBold)
859        ));
860        assert_eq!(runs[0].text, "Hello");
861        let body_run = runs.iter().find(|r| r.text == "body").expect("body run");
862        assert!(matches!(body_run.font, Font::Base14(Base14Font::Helvetica)));
863        assert!(body_run.baseline_from_top_pt > runs[0].baseline_from_top_pt);
864    }
865
866    #[test]
867    fn long_paragraph_paginates() {
868        // Build a paragraph long enough to spill a second page at
869        // body size + leading 1.35.
870        let mut doc = Document::new(PathBuf::from("test.mos"));
871        // ~150 lines of text at 11pt × 1.35 leading ≈ 2227 pt of
872        // copy. A4 minus margins is roughly 706 pt of vertical
873        // space, so we expect ≥ 3 pages.
874        let mut text = String::new();
875        for i in 0..1500 {
876            text.push_str(&format!("word{i} "));
877        }
878        make_paragraph(&mut doc, text.trim());
879        let result = LayoutEngine::new().layout(&doc);
880        assert!(
881            result.graph.pages.len() >= 2,
882            "expected pagination, got {} page(s)",
883            result.graph.pages.len()
884        );
885    }
886
887    #[test]
888    fn page_boundary_signatures_are_stable_and_change_with_pagination() {
889        fn lay_out(word_count: usize) -> LayoutResult {
890            let mut doc = Document::new(PathBuf::from("test.mos"));
891            let mut text = String::new();
892            for i in 0..word_count {
893                text.push_str(&format!("word{i} "));
894            }
895            make_paragraph(&mut doc, text.trim());
896            LayoutEngine::new().layout(&doc)
897        }
898
899        // Deterministic for unchanged input: the same document laid out twice
900        // signs identically and diverges nowhere.
901        let first = lay_out(1500);
902        let again = lay_out(1500);
903        assert!(first.graph.pages.len() >= 2, "expected a multi-page layout");
904        assert_eq!(
905            first.page_boundary_signatures(),
906            again.page_boundary_signatures(),
907        );
908        assert_eq!(
909            first
910                .page_boundary_signatures()
911                .first_divergence(&again.page_boundary_signatures()),
912            None,
913        );
914
915        // Appending copy reflows pagination, so the signatures must diverge at
916        // some page.
917        let longer = lay_out(1540);
918        let base_sig = first.page_boundary_signatures();
919        let longer_sig = longer.page_boundary_signatures();
920        assert_ne!(base_sig, longer_sig);
921        assert!(longer_sig.pages().len() >= base_sig.pages().len());
922        assert!(base_sig.first_divergence(&longer_sig).is_some());
923    }
924
925    #[test]
926    fn label_pages_maps_a_first_block_to_page_one() {
927        let mut doc = Document::new(PathBuf::from("test.mos"));
928        make_labelled_paragraph(&mut doc, "Introduction", "intro");
929        let result = LayoutEngine::new().layout(&doc);
930        assert_eq!(result.label_pages.get("intro").copied(), Some(1));
931    }
932
933    #[test]
934    fn label_pages_records_the_start_page_after_a_break() {
935        // A page-filling paragraph, then a labelled paragraph with a unique
936        // word. The label must bind to the page its content actually lands on
937        // (post-break), not the page the cursor sat on before the break.
938        let mut doc = Document::new(PathBuf::from("test.mos"));
939        let mut filler = String::new();
940        for i in 0..1500 {
941            filler.push_str(&format!("word{i} "));
942        }
943        make_paragraph(&mut doc, filler.trim());
944        make_labelled_paragraph(&mut doc, "ZZUNIQUE", "tail");
945
946        let result = LayoutEngine::new().layout(&doc);
947        assert!(
948            result.graph.pages.len() >= 2,
949            "expected a multi-page layout"
950        );
951
952        let recorded = result.label_pages.get("tail").copied();
953        // Cross-check: the recorded page is exactly the page whose runs contain
954        // the labelled paragraph's text.
955        let actual = result
956            .graph
957            .pages
958            .iter()
959            .find(|page| page.runs.iter().any(|run| run.text == "ZZUNIQUE"))
960            .map(|page| page.number);
961        assert_eq!(recorded, actual);
962        assert!(recorded.is_some_and(|page| page >= 2), "{recorded:?}");
963    }
964
965    #[test]
966    fn label_pages_omits_unlabelled_blocks_and_keeps_first_occurrence() {
967        let mut doc = Document::new(PathBuf::from("test.mos"));
968        make_paragraph(&mut doc, "unlabelled");
969        make_labelled_paragraph(&mut doc, "first", "dup");
970        make_labelled_paragraph(&mut doc, "second", "dup");
971        let result = LayoutEngine::new().layout(&doc);
972        // One entry per distinct label; the unlabelled block contributes none.
973        assert_eq!(result.label_pages.len(), 1);
974        assert_eq!(result.label_pages.get("dup").copied(), Some(1));
975    }
976
977    #[test]
978    fn label_pages_omits_a_labelled_block_that_emits_no_content() {
979        let mut doc = Document::new(PathBuf::from("test.mos"));
980        // A labelled empty paragraph commits no content (no words to flush)...
981        make_labelled_paragraph(&mut doc, "", "ghost");
982        // ...as does a labelled unsupported block (Table is ignored in MVP 0).
983        let mut table_attrs = AttrMap::new();
984        table_attrs.insert("label".to_owned(), AttrValue::Str("phantom".to_owned()));
985        doc.alloc_child(
986            doc.root,
987            NodeSpec::new(
988                NodeKind::Table,
989                SourceSpan::placeholder(PathBuf::from("test.mos")),
990            )
991            .with_attributes(table_attrs),
992        );
993        // ...then a normal labelled paragraph that does.
994        make_labelled_paragraph(&mut doc, "real text", "real");
995
996        let result = LayoutEngine::new().layout(&doc);
997        // No-content labels must not leak onto a later block's page: they are
998        // simply absent. Only the real paragraph maps, to its own page.
999        assert!(!result.label_pages.contains_key("ghost"));
1000        assert!(!result.label_pages.contains_key("phantom"));
1001        assert_eq!(result.label_pages.get("real").copied(), Some(1));
1002        assert_eq!(result.label_pages.len(), 1);
1003    }
1004
1005    #[test]
1006    fn emphasis_run_uses_oblique() {
1007        let mut doc = Document::new(PathBuf::from("test.mos"));
1008        pin_helvetica(&mut doc);
1009        let para = make_paragraph(&mut doc, "before");
1010        alloc_inline(&mut doc, para, NodeKind::Emphasis, "italic");
1011        alloc_inline(&mut doc, para, NodeKind::Text, "after");
1012        let result = LayoutEngine::new().layout(&doc);
1013        let runs = &result.graph.pages[0].runs;
1014        let italic = runs
1015            .iter()
1016            .find(|r| r.text == "italic")
1017            .expect("italic run");
1018        assert!(matches!(
1019            italic.font,
1020            Font::Base14(Base14Font::HelveticaOblique)
1021        ));
1022    }
1023
1024    #[test]
1025    fn bold_italic_run_uses_bold_oblique() {
1026        let mut doc = Document::new(PathBuf::from("test.mos"));
1027        pin_helvetica(&mut doc);
1028        let para = make_paragraph(&mut doc, "before");
1029        alloc_inline(&mut doc, para, NodeKind::BoldItalic, "both");
1030        alloc_inline(&mut doc, para, NodeKind::Text, "after");
1031        let result = LayoutEngine::new().layout(&doc);
1032        let runs = &result.graph.pages[0].runs;
1033        let both = runs
1034            .iter()
1035            .find(|r| r.text == "both")
1036            .expect("bold-italic run");
1037        assert!(matches!(
1038            both.font,
1039            Font::Base14(Base14Font::HelveticaBoldOblique)
1040        ));
1041    }
1042
1043    #[test]
1044    fn runs_stay_within_horizontal_margins() {
1045        let mut doc = Document::new(PathBuf::from("test.mos"));
1046        make_paragraph(
1047            &mut doc,
1048            "the quick brown fox jumps over the lazy dog the quick brown fox",
1049        );
1050        let result = LayoutEngine::new().layout(&doc);
1051        let runs = &result.graph.pages[0].runs;
1052        assert!(!runs.is_empty());
1053        let right = A4_WIDTH_PT - MARGIN_PT;
1054        for run in runs {
1055            assert!(run.x_pt >= MARGIN_PT - 1e-6, "x={}", run.x_pt);
1056            let end = run.x_pt + text_width(run.font, run.size_pt, &run.text);
1057            assert!(end <= right + 1e-3, "end={end} right={right}");
1058        }
1059    }
1060
1061    #[test]
1062    fn cyrillic_flows_through_embedded_default_without_substitution() {
1063        // The default text family is bundled Noto Sans, which covers
1064        // Cyrillic. The run carries the original UTF-8 text verbatim
1065        // and a non-empty shaped glyph stream; no substitution diagnostic (the warning
1066        // is retired) and no `?` substitution.
1067        let mut doc = Document::new(PathBuf::from("test.mos"));
1068        make_paragraph(&mut doc, "Привет");
1069        let result = LayoutEngine::new().layout(&doc);
1070        assert!(
1071            result.diagnostics.is_empty(),
1072            "expected no diagnostics, got {:?}",
1073            result.diagnostics
1074        );
1075        let runs = &result.graph.pages[0].runs;
1076        let cyr = runs.iter().find(|r| r.text == "Привет").expect("cyr run");
1077        assert!(matches!(cyr.font, Font::Embedded(_)));
1078        assert!(!cyr.glyphs.is_empty(), "expected shaped glyphs");
1079    }
1080
1081    #[test]
1082    fn decomposed_text_is_normalized_before_shaping() {
1083        let mut doc = Document::new(PathBuf::from("test.mos"));
1084        make_paragraph(&mut doc, "S\u{0326}");
1085
1086        let result = LayoutEngine::new().layout(&doc);
1087
1088        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1089        let run = result.graph.pages[0]
1090            .runs
1091            .iter()
1092            .find(|r| r.text == "\u{0218}")
1093            .expect("normalized run");
1094        assert!(matches!(run.font, Font::Embedded(_)));
1095        assert!(!run.glyphs.is_empty(), "expected shaped glyphs");
1096    }
1097
1098    #[test]
1099    fn extended_latin_passes_through_without_warning() {
1100        // Polish + Czech: every char is either a WinAnsi native
1101        // (`ó`, `r`, `i`, …) or an extended glyph reachable via
1102        // `extended_glyph_name` (`ł`, `Ł`, `ě`, `ř`; `ž` is WinAnsi at 0x9E).
1103        // No substitution, no diagnostic.
1104        let mut doc = Document::new(PathBuf::from("test.mos"));
1105        make_paragraph(&mut doc, "Łódź — Příliš");
1106        let result = LayoutEngine::new().layout(&doc);
1107        assert!(
1108            result.diagnostics.is_empty(),
1109            "expected no diagnostics, got {:?}",
1110            result.diagnostics
1111        );
1112        let text: String = result.graph.pages[0]
1113            .runs
1114            .iter()
1115            .map(|r| r.text.as_str())
1116            .collect::<Vec<_>>()
1117            .join(" ");
1118        assert!(text.contains("Łódź"), "got {text}");
1119        assert!(text.contains("Příliš"), "got {text}");
1120    }
1121
1122    #[test]
1123    fn cjk_and_emoji_flow_through_without_diagnostics() {
1124        // The substitution warning is retired. CJK and emoji are not covered by bundled
1125        // Noto Sans Regular either, but the layout engine no longer
1126        // filters them: they pass through to the shaped glyph stream
1127        // (rustybuzz emits `.notdef` glyphs for missing coverage,
1128        // which the PDF backend embeds harmlessly).
1129        let mut doc = Document::new(PathBuf::from("test.mos"));
1130        make_paragraph(&mut doc, "日本語 🦀");
1131        let result = LayoutEngine::new().layout(&doc);
1132        assert!(
1133            result.diagnostics.is_empty(),
1134            "uncovered glyphs should flow through without a diagnostic, got {:?}",
1135            result.diagnostics
1136        );
1137    }
1138
1139    #[test]
1140    fn winansi_chars_pass_through_without_warning() {
1141        // café / §1 / Straße all live in WinAnsi (Latin-1 + section
1142        // sign + germandbls). No substitution, no diagnostic.
1143        let mut doc = Document::new(PathBuf::from("test.mos"));
1144        make_paragraph(&mut doc, "café §1 Straße");
1145        let result = LayoutEngine::new().layout(&doc);
1146        assert!(
1147            result.diagnostics.is_empty(),
1148            "expected no diagnostics, got {:?}",
1149            result.diagnostics
1150        );
1151        let text: String = result.graph.pages[0]
1152            .runs
1153            .iter()
1154            .map(|r| r.text.as_str())
1155            .collect::<Vec<_>>()
1156            .join(" ");
1157        assert!(text.contains("café"), "got {text}");
1158        assert!(text.contains("Straße"), "got {text}");
1159    }
1160
1161    #[test]
1162    fn empty_document_emits_one_blank_page() {
1163        let doc = Document::new(PathBuf::from("test.mos"));
1164        let result = LayoutEngine::new().layout(&doc);
1165        assert_eq!(result.graph.pages.len(), 1);
1166        assert!(result.graph.pages[0].runs.is_empty());
1167    }
1168
1169    #[test]
1170    fn raw_inline_uses_courier() {
1171        let mut doc = Document::new(PathBuf::from("test.mos"));
1172        pin_helvetica(&mut doc);
1173        let para = make_paragraph(&mut doc, "before");
1174        alloc_inline(&mut doc, para, NodeKind::Raw, "code");
1175        alloc_inline(&mut doc, para, NodeKind::Text, "after");
1176        let result = LayoutEngine::new().layout(&doc);
1177        let runs = &result.graph.pages[0].runs;
1178        let code_run = runs.iter().find(|r| r.text == "code").expect("code run");
1179        assert!(matches!(code_run.font, Font::Base14(Base14Font::Courier)));
1180        // Adjacent runs stay in the default Helvetica face so the
1181        // engine isn't accidentally promoting everything to Courier.
1182        assert!(matches!(
1183            runs.iter().find(|r| r.text == "before").unwrap().font,
1184            Font::Base14(Base14Font::Helvetica)
1185        ));
1186    }
1187
1188    #[test]
1189    fn raw_block_tabs_render_as_spaces() {
1190        let mut doc = Document::new(PathBuf::from("test.mos"));
1191        make_raw_block(&mut doc, "\tprintln(\"hello\");");
1192
1193        let result = LayoutEngine::new().layout(&doc);
1194
1195        let rendered = result.graph.pages[0]
1196            .runs
1197            .iter()
1198            .map(|run| run.text.as_str())
1199            .collect::<String>();
1200        assert!(
1201            !rendered.contains('\t'),
1202            "raw block tabs should be expanded before shaping: {rendered:?}"
1203        );
1204        assert!(
1205            rendered.contains("    println"),
1206            "expected four-space tab expansion, got {rendered:?}"
1207        );
1208        assert!(
1209            result.graph.pages[0]
1210                .runs
1211                .iter()
1212                .any(|run| run.actual_text.as_deref() == Some("\tprintln(\"hello\");")),
1213            "raw block tabs should retain their original text for extraction"
1214        );
1215    }
1216
1217    #[test]
1218    fn raw_block_leading_blank_line_preserves_spacing() {
1219        let mut doc = Document::new(PathBuf::from("test.mos"));
1220        make_raw_block(&mut doc, "\ncode");
1221
1222        let result = LayoutEngine::new().layout(&doc);
1223
1224        let first_run = result.graph.pages[0]
1225            .runs
1226            .first()
1227            .expect("raw block should emit text after the leading blank");
1228        let expected_baseline = MARGIN_PT
1229            + ascent(FontFamily::noto_sans().monospace, BODY_SIZE_PT)
1230            + BODY_SIZE_PT * BODY_LEADING;
1231        assert!(
1232            (first_run.baseline_from_top_pt - expected_baseline).abs() < 0.01,
1233            "baseline {}, expected {expected_baseline}",
1234            first_run.baseline_from_top_pt
1235        );
1236    }
1237
1238    #[test]
1239    fn heading_levels_pick_distinct_sizes() {
1240        let mut doc = Document::new(PathBuf::from("test.mos"));
1241        make_section(&mut doc, 1, "H1");
1242        make_section(&mut doc, 2, "H2");
1243        make_section(&mut doc, 3, "H3");
1244        let result = LayoutEngine::new().layout(&doc);
1245        let runs = &result.graph.pages[0].runs;
1246        let h1 = runs.iter().find(|r| r.text == "H1").expect("H1 run");
1247        let h2 = runs.iter().find(|r| r.text == "H2").expect("H2 run");
1248        let h3 = runs.iter().find(|r| r.text == "H3").expect("H3 run");
1249        assert_eq!(h1.size_pt, HEADING_SIZES_PT[0]);
1250        assert_eq!(h2.size_pt, HEADING_SIZES_PT[1]);
1251        assert_eq!(h3.size_pt, HEADING_SIZES_PT[2]);
1252        // Each level is strictly smaller than the one above it.
1253        assert!(h1.size_pt > h2.size_pt);
1254        assert!(h2.size_pt > h3.size_pt);
1255        // Vertical order matches source order.
1256        assert!(h1.baseline_from_top_pt < h2.baseline_from_top_pt);
1257        assert!(h2.baseline_from_top_pt < h3.baseline_from_top_pt);
1258    }
1259
1260    #[test]
1261    fn heading_after_long_paragraph_paginates_correctly() {
1262        // A paragraph long enough to span multiple pages, followed
1263        // by a heading. The heading must appear *after* every
1264        // paragraph word in document order, and the first paragraph
1265        // word and the heading must end up on different pages.
1266        let mut doc = Document::new(PathBuf::from("test.mos"));
1267        pin_helvetica(&mut doc);
1268        let mut text = String::new();
1269        for i in 0..1500 {
1270            text.push_str(&format!("word{i} "));
1271        }
1272        make_paragraph(&mut doc, text.trim());
1273        make_section(&mut doc, 1, "After");
1274        let result = LayoutEngine::new().layout(&doc);
1275        assert!(
1276            result.graph.pages.len() >= 2,
1277            "expected pagination, got {} page(s)",
1278            result.graph.pages.len()
1279        );
1280        // Locate the heading and the very first paragraph word.
1281        let mut heading_page: Option<u32> = None;
1282        let mut first_word_page: Option<u32> = None;
1283        for page in &result.graph.pages {
1284            for run in &page.runs {
1285                if run.text == "After"
1286                    && matches!(run.font, Font::Base14(Base14Font::HelveticaBold))
1287                {
1288                    heading_page = Some(page.number);
1289                }
1290                if run.text == "word0" && first_word_page.is_none() {
1291                    first_word_page = Some(page.number);
1292                }
1293            }
1294        }
1295        let heading_page = heading_page.expect("heading run not emitted");
1296        let first_word_page = first_word_page.expect("first paragraph word not emitted");
1297        assert!(
1298            heading_page > first_word_page,
1299            "heading on page {heading_page}, first paragraph word on page {first_word_page}"
1300        );
1301    }
1302
1303    #[test]
1304    fn heading_with_number_attribute_emits_prefix_run() {
1305        // Resolver writes `number = "2.1"` onto a section node; layout
1306        // must emit a leading bold run with that number plus a trailing
1307        // dot, ahead of the heading text.
1308        let mut doc = Document::new(PathBuf::from("test.mos"));
1309        pin_helvetica(&mut doc);
1310        let mut attrs = AttrMap::new();
1311        attrs.insert("level".to_owned(), AttrValue::Int(2));
1312        attrs.insert("number".to_owned(), AttrValue::Str("2.1".to_owned()));
1313        let section = doc.alloc_child(
1314            doc.root,
1315            NodeSpec::new(
1316                NodeKind::Section,
1317                SourceSpan::placeholder(PathBuf::from("test.mos")),
1318            )
1319            .with_attributes(attrs),
1320        );
1321        alloc_inline(&mut doc, section, NodeKind::Text, "Background");
1322        let result = LayoutEngine::new().layout(&doc);
1323        let runs = &result.graph.pages[0].runs;
1324        assert!(matches!(
1325            runs[0].font,
1326            Font::Base14(Base14Font::HelveticaBold)
1327        ));
1328        assert_eq!(runs[0].text, "2.1.");
1329        assert!(runs.iter().any(|r| r.text == "Background"));
1330        // The number's baseline matches the title's baseline because
1331        // they live on the same line.
1332        let title = runs.iter().find(|r| r.text == "Background").unwrap();
1333        assert!((runs[0].baseline_from_top_pt - title.baseline_from_top_pt).abs() < 1e-3);
1334    }
1335
1336    #[test]
1337    fn reference_node_renders_resolved_text() {
1338        // A `Reference` node with a `text` attribute (set by the
1339        // resolver) flows through `collect_words` like any other inline
1340        //; no separate code path. The font defaults to the body face.
1341        let mut doc = Document::new(PathBuf::from("test.mos"));
1342        pin_helvetica(&mut doc);
1343        let para = make_paragraph(&mut doc, "see");
1344        let mut attrs = AttrMap::new();
1345        attrs.insert("label".to_owned(), AttrValue::Str("intro".to_owned()));
1346        attrs.insert("text".to_owned(), AttrValue::Str("1.2".to_owned()));
1347        doc.alloc_child(
1348            para,
1349            NodeSpec::new(
1350                NodeKind::Reference,
1351                SourceSpan::placeholder(PathBuf::from("test.mos")),
1352            )
1353            .with_attributes(attrs),
1354        );
1355        let result = LayoutEngine::new().layout(&doc);
1356        let runs = &result.graph.pages[0].runs;
1357        let reference = runs.iter().find(|r| r.text == "1.2").expect("ref run");
1358        assert!(matches!(
1359            reference.font,
1360            Font::Base14(Base14Font::Helvetica)
1361        ));
1362    }
1363
1364    // ---------- line-break controls (issue #26) ----------
1365
1366    fn alloc_hardbreak(doc: &mut Document, parent: NodeId) {
1367        // HardBreak nodes carry no attributes -- layout dispatches on
1368        // `NodeKind` alone (see `collect_words`).
1369        doc.alloc_child(
1370            parent,
1371            NodeSpec::new(
1372                NodeKind::HardBreak,
1373                SourceSpan::placeholder(PathBuf::from("test.mos")),
1374            ),
1375        );
1376    }
1377
1378    fn make_empty_paragraph(doc: &mut Document) -> NodeId {
1379        doc.alloc_child(
1380            doc.root,
1381            NodeSpec::new(
1382                NodeKind::Paragraph,
1383                SourceSpan::placeholder(PathBuf::from("test.mos")),
1384            ),
1385        )
1386    }
1387
1388    #[test]
1389    fn nbsp_keeps_two_words_in_a_single_run() {
1390        // U+00A0 is *not* ASCII whitespace, so the greedy breaker
1391        // never splits at it. The two halves end up as one TextRun
1392        // with the NBSP byte preserved -- the documented contract.
1393        let mut doc = Document::new(PathBuf::from("test.mos"));
1394        pin_helvetica(&mut doc);
1395        make_paragraph(&mut doc, "Mr.\u{A0}Smith");
1396        let result = LayoutEngine::new().layout(&doc);
1397        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1398        let runs = &result.graph.pages[0].runs;
1399        assert_eq!(runs.len(), 1, "expected one TextRun, got {runs:?}");
1400        assert_eq!(runs[0].text, "Mr.\u{A0}Smith");
1401    }
1402
1403    #[test]
1404    fn hard_break_advances_one_line_not_paragraph_spacing() {
1405        let mut doc = Document::new(PathBuf::from("test.mos"));
1406        pin_helvetica(&mut doc);
1407        let para = make_empty_paragraph(&mut doc);
1408        alloc_inline(&mut doc, para, NodeKind::Text, "foo");
1409        alloc_hardbreak(&mut doc, para);
1410        alloc_inline(&mut doc, para, NodeKind::Text, "bar");
1411
1412        let result = LayoutEngine::new().layout(&doc);
1413        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1414        let runs = &result.graph.pages[0].runs;
1415        let foo = runs.iter().find(|r| r.text == "foo").expect("foo run");
1416        let bar = runs.iter().find(|r| r.text == "bar").expect("bar run");
1417        let delta = bar.baseline_from_top_pt - foo.baseline_from_top_pt;
1418        // BODY_SIZE_PT × default leading is the inter-line distance;
1419        // accept either 1.0 or the resolved style's default leading
1420        // by checking against the computed product.
1421        let expected = BODY_SIZE_PT * BODY_LEADING;
1422        assert!(
1423            (delta - expected).abs() < 0.01,
1424            "expected inter-line delta {expected}, got {delta} (foo={}, bar={})",
1425            foo.baseline_from_top_pt,
1426            bar.baseline_from_top_pt
1427        );
1428    }
1429
1430    #[test]
1431    fn two_hard_breaks_produce_a_blank_line() {
1432        let mut doc = Document::new(PathBuf::from("test.mos"));
1433        pin_helvetica(&mut doc);
1434        let para = make_empty_paragraph(&mut doc);
1435        alloc_inline(&mut doc, para, NodeKind::Text, "foo");
1436        alloc_hardbreak(&mut doc, para);
1437        alloc_hardbreak(&mut doc, para);
1438        alloc_inline(&mut doc, para, NodeKind::Text, "bar");
1439
1440        let result = LayoutEngine::new().layout(&doc);
1441        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1442        let runs = &result.graph.pages[0].runs;
1443        let foo = runs.iter().find(|r| r.text == "foo").expect("foo run");
1444        let bar = runs.iter().find(|r| r.text == "bar").expect("bar run");
1445        let delta = bar.baseline_from_top_pt - foo.baseline_from_top_pt;
1446        // Two line advances: one to flush "foo", one for the blank
1447        // line between the two hard breaks.
1448        let one_line = BODY_SIZE_PT * BODY_LEADING;
1449        let expected = 2.0 * one_line;
1450        assert!(
1451            (delta - expected).abs() < 0.01,
1452            "expected delta {expected} for two-line gap, got {delta}"
1453        );
1454    }
1455
1456    #[test]
1457    fn hard_break_at_paragraph_start_collapses_on_first_page_line() {
1458        // When the paragraph begins with a hard break and nothing is
1459        // on the page yet, the leading break has nothing above it to
1460        // push down -- it collapses, matching CommonMark's "ignore
1461        // hard break at block boundary".
1462        let mut doc = Document::new(PathBuf::from("test.mos"));
1463        pin_helvetica(&mut doc);
1464        let para = make_empty_paragraph(&mut doc);
1465        alloc_hardbreak(&mut doc, para);
1466        alloc_inline(&mut doc, para, NodeKind::Text, "foo");
1467
1468        let result = LayoutEngine::new().layout(&doc);
1469        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1470        let runs = &result.graph.pages[0].runs;
1471        assert_eq!(runs.len(), 1, "expected one run, got {runs:?}");
1472        assert_eq!(runs[0].text, "foo");
1473    }
1474
1475    #[test]
1476    fn hard_break_after_oversize_word_does_not_add_blank_line() {
1477        // Regression: an oversize word emits chunks via
1478        // `flush_oversize_word`, which implicitly ends the line. A
1479        // following hard break used to add *another* line on top of
1480        // that implicit end. The break should now be absorbed so
1481        // `oversize\\next` produces `next` on the immediate next
1482        // line, not a blank-then-next.
1483        let mut doc = Document::new(PathBuf::from("test.mos"));
1484        pin_helvetica(&mut doc);
1485        let para = make_empty_paragraph(&mut doc);
1486        // A 500-char run of `a` is certainly wider than the default A4
1487        // column; the layout chops it into oversize chunks.
1488        let huge: String = "a".repeat(500);
1489        alloc_inline(&mut doc, para, NodeKind::Text, &huge);
1490        alloc_hardbreak(&mut doc, para);
1491        alloc_inline(&mut doc, para, NodeKind::Text, "next");
1492
1493        let result = LayoutEngine::new().layout(&doc);
1494        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1495        let runs = &result.graph.pages[0].runs;
1496        let next = runs.iter().find(|r| r.text == "next").expect("next run");
1497        // The last oversize chunk is the run whose text is all `a`s
1498        // and whose baseline is the highest among `a`-only runs.
1499        let last_chunk_baseline = runs
1500            .iter()
1501            .filter(|r| !r.text.is_empty() && r.text.chars().all(|c| c == 'a'))
1502            .map(|r| r.baseline_from_top_pt)
1503            .fold(f32::NEG_INFINITY, f32::max);
1504        assert!(
1505            last_chunk_baseline.is_finite(),
1506            "did not find any oversize-chunk runs"
1507        );
1508        let one_line = BODY_SIZE_PT * BODY_LEADING;
1509        let delta = next.baseline_from_top_pt - last_chunk_baseline;
1510        assert!(
1511            (delta - one_line).abs() < 0.01,
1512            "expected one-line gap ({one_line:.3}pt) between last oversize chunk and `next`, got {delta:.3}pt -- hard break emitted extra blank?"
1513        );
1514    }
1515
1516    #[test]
1517    fn leading_hard_break_collapses_after_prior_page_content() {
1518        // A hard break at the start of a paragraph must collapse even
1519        // when prior paragraphs have painted on the page -- the rule
1520        // is block-boundary, not page-state. Compare against a control
1521        // doc whose second paragraph has no leading hard break: the
1522        // two layouts must agree on the vertical position of `second`.
1523        let layout_one = |with_leading_break: bool| {
1524            let mut doc = Document::new(PathBuf::from("test.mos"));
1525            pin_helvetica(&mut doc);
1526            make_paragraph(&mut doc, "first");
1527            let p2 = make_empty_paragraph(&mut doc);
1528            if with_leading_break {
1529                alloc_hardbreak(&mut doc, p2);
1530            }
1531            alloc_inline(&mut doc, p2, NodeKind::Text, "second");
1532            LayoutEngine::new().layout(&doc).graph.pages[0]
1533                .runs
1534                .iter()
1535                .find(|r| r.text == "second")
1536                .expect("second run")
1537                .baseline_from_top_pt
1538        };
1539        let actual = layout_one(true);
1540        let control = layout_one(false);
1541        assert!(
1542            (actual - control).abs() < 0.01,
1543            "leading hard break should collapse: control baseline {control:.3}pt, got {actual:.3}pt"
1544        );
1545    }
1546
1547    #[test]
1548    fn shy_only_piece_does_not_emit_phantom_word() {
1549        // A whitespace-delimited piece consisting entirely of SHY
1550        // codepoints strips to empty -- skip it rather than emit a
1551        // zero-width Word, which would push an extra space gap into
1552        // the line because flush_line charges one space-advance per
1553        // word past the first.
1554        let mut doc = Document::new(PathBuf::from("test.mos"));
1555        pin_helvetica(&mut doc);
1556        // Three pieces after ASCII-whitespace split: "foo", "\u{AD}\u{AD}", "bar".
1557        // The middle piece must produce zero Word items.
1558        make_paragraph(&mut doc, "foo \u{AD}\u{AD} bar");
1559        let result = LayoutEngine::new().layout(&doc);
1560        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1561        let runs = &result.graph.pages[0].runs;
1562        assert_eq!(
1563            runs.iter().map(|r| r.text.as_str()).collect::<Vec<_>>(),
1564            vec!["foo", "bar"],
1565            "got {runs:?}"
1566        );
1567        let foo_w = text_width(runs[0].font, runs[0].size_pt, "foo");
1568        let space_w = text_width(runs[0].font, runs[0].size_pt, " ");
1569        let gap = runs[1].x_pt - (runs[0].x_pt + foo_w);
1570        assert!(
1571            (gap - space_w).abs() < 0.01,
1572            "expected one space gap ({space_w:.3}pt), got {gap:.3}pt -- phantom SHY-only word?"
1573        );
1574    }
1575
1576    #[test]
1577    fn soft_hyphen_is_stripped_from_emitted_runs() {
1578        // SHY codepoints must never appear in the rendered text --
1579        // when the word fits, the greedy breaker leaves it alone and
1580        // no visible hyphen is emitted.
1581        let mut doc = Document::new(PathBuf::from("test.mos"));
1582        pin_helvetica(&mut doc);
1583        make_paragraph(&mut doc, "super\u{AD}cali\u{AD}fragil");
1584        let result = LayoutEngine::new().layout(&doc);
1585        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1586        let runs = &result.graph.pages[0].runs;
1587        assert_eq!(runs.len(), 1, "expected one run, got {runs:?}");
1588        assert_eq!(runs[0].text, "supercalifragil");
1589        assert!(
1590            !runs[0].text.contains('\u{AD}'),
1591            "SHY leaked into rendered text: {:?}",
1592            runs[0].text
1593        );
1594    }
1595
1596    /// Insert a `#set page(margin: <pt>)` block so the column is
1597    /// narrow enough to force soft-hyphen breaks in subsequent
1598    /// paragraphs. Mirrors the helper in `style.rs`.
1599    fn pin_narrow_margin(doc: &mut Document, margin_pt: f32) {
1600        let mut attrs = AttrMap::new();
1601        attrs.insert("set".to_owned(), AttrValue::Str("page".to_owned()));
1602        attrs.insert(
1603            "set.arg.margin".to_owned(),
1604            AttrValue::Length(f64::from(margin_pt)),
1605        );
1606        doc.alloc_child(
1607            doc.root,
1608            NodeSpec::new(
1609                NodeKind::Raw,
1610                SourceSpan::placeholder(PathBuf::from("test.mos")),
1611            )
1612            .with_attributes(attrs),
1613        );
1614    }
1615
1616    #[test]
1617    fn shy_breaks_word_when_line_overflows() {
1618        // `su\u{AD}per` on a column narrow enough that "super"
1619        // overflows but "su-" fits. The greedy breaker must split
1620        // at the SHY offset, render "su-" on the first line, and
1621        // continue with "per" on the second.
1622        let mut doc = Document::new(PathBuf::from("test.mos"));
1623        pin_helvetica(&mut doc);
1624        // Column ≈ 25pt (just over `su-` at 12pt Helvetica, under
1625        // the full `super`).
1626        pin_narrow_margin(&mut doc, (A4_WIDTH_PT - 25.0) / 2.0);
1627        make_paragraph(&mut doc, "su\u{AD}per");
1628        let result = LayoutEngine::new().layout(&doc);
1629        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1630        let runs = &result.graph.pages[0].runs;
1631        let texts: Vec<&str> = runs.iter().map(|r| r.text.as_str()).collect();
1632        assert_eq!(texts, vec!["su-", "per"], "got {runs:?}");
1633        assert!(runs[1].baseline_from_top_pt > runs[0].baseline_from_top_pt);
1634        for r in runs {
1635            assert!(
1636                !r.text.contains('\u{AD}'),
1637                "SHY leaked into rendered text: {:?}",
1638                r.text
1639            );
1640        }
1641    }
1642
1643    #[test]
1644    fn shy_picks_latest_fitting_break() {
1645        // Two SHYs in `super\u{AD}cali\u{AD}fragil` (offsets 5, 9).
1646        // Column wide enough for "supercali-" but not the full word
1647        // must pick the latest fitting offset (9), not the earliest.
1648        let mut doc = Document::new(PathBuf::from("test.mos"));
1649        pin_helvetica(&mut doc);
1650        let font = Font::Base14(Base14Font::Helvetica);
1651        let target = text_width(font, BODY_SIZE_PT, "supercali-") + 2.0;
1652        pin_narrow_margin(&mut doc, (A4_WIDTH_PT - target) / 2.0);
1653        make_paragraph(&mut doc, "super\u{AD}cali\u{AD}fragil");
1654        let result = LayoutEngine::new().layout(&doc);
1655        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1656        let runs = &result.graph.pages[0].runs;
1657        let texts: Vec<&str> = runs.iter().map(|r| r.text.as_str()).collect();
1658        assert_eq!(texts, vec!["supercali-", "fragil"], "got {runs:?}");
1659    }
1660
1661    #[test]
1662    fn shy_at_zero_or_end_offset_is_ignored() {
1663        // `\u{AD}foo\u{AD}` strips to "foo" with offsets [0, 3] --
1664        // both are boundary positions and must be ignored. With a
1665        // column too narrow for "foo" the SHY path returns None and
1666        // the existing oversize-cluster fallback fires; no bare
1667        // leading or trailing hyphen appears.
1668        let mut doc = Document::new(PathBuf::from("test.mos"));
1669        pin_helvetica(&mut doc);
1670        let font = Font::Base14(Base14Font::Helvetica);
1671        // Narrower than "foo" so the column can't hold it whole.
1672        let target = text_width(font, BODY_SIZE_PT, "fo");
1673        pin_narrow_margin(&mut doc, (A4_WIDTH_PT - target) / 2.0);
1674        make_paragraph(&mut doc, "\u{AD}foo\u{AD}");
1675        let result = LayoutEngine::new().layout(&doc);
1676        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1677        let runs = &result.graph.pages[0].runs;
1678        // Boundary SHYs ignored: no run is just "-" and no run
1679        // ends with "-" (that would mean a SHY break was taken).
1680        for r in runs {
1681            assert_ne!(r.text, "-", "bare hyphen run from boundary SHY");
1682            assert!(
1683                !r.text.ends_with('-'),
1684                "trailing hyphen from boundary SHY: {:?}",
1685                r.text
1686            );
1687        }
1688        let joined: String = runs.iter().map(|r| r.text.as_str()).collect();
1689        assert_eq!(joined, "foo", "all clusters together still spell foo");
1690    }
1691
1692    #[test]
1693    fn shy_falls_back_to_oversize_when_no_break_fits() {
1694        // Column so narrow that neither prefix at the SHY offset
1695        // ("super-" nor "supercali-") fits. The breaker falls
1696        // through to `flush_oversize_word`, which chops by
1697        // shaped clusters -- no SHY-driven `-` appears.
1698        let mut doc = Document::new(PathBuf::from("test.mos"));
1699        pin_helvetica(&mut doc);
1700        let font = Font::Base14(Base14Font::Helvetica);
1701        // Narrower than even "su-": forces every candidate to fail.
1702        let target = text_width(font, BODY_SIZE_PT, "s") + 0.5;
1703        pin_narrow_margin(&mut doc, (A4_WIDTH_PT - target) / 2.0);
1704        make_paragraph(&mut doc, "super\u{AD}cali");
1705        let result = LayoutEngine::new().layout(&doc);
1706        assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
1707        let runs = &result.graph.pages[0].runs;
1708        // Cluster fallback emits single-character runs; none of
1709        // them end with `-` (the source has no `-` and the SHY
1710        // path never fired).
1711        for r in runs {
1712            assert!(
1713                !r.text.ends_with('-'),
1714                "oversize fallback emitted hyphen: {:?}",
1715                r.text
1716            );
1717        }
1718        let joined: String = runs.iter().map(|r| r.text.as_str()).collect();
1719        assert_eq!(joined, "supercali");
1720    }
1721
1722    #[test]
1723    fn set_blocks_are_skipped() {
1724        let mut doc = Document::new(PathBuf::from("test.mos"));
1725        let mut attrs = AttrMap::new();
1726        attrs.insert("set".to_owned(), AttrValue::Str("page".to_owned()));
1727        doc.alloc_child(
1728            doc.root,
1729            NodeSpec::new(
1730                NodeKind::Raw,
1731                SourceSpan::placeholder(PathBuf::from("test.mos")),
1732            )
1733            .with_attributes(attrs),
1734        );
1735        make_paragraph(&mut doc, "body");
1736        let result = LayoutEngine::new().layout(&doc);
1737        let runs = &result.graph.pages[0].runs;
1738        assert_eq!(runs.len(), 1);
1739        assert_eq!(runs[0].text, "body");
1740    }
1741}