Skip to main content

mos_eval/
resolve.rs

1//! Cross-reference resolver (manifest §6 stage 3, MVP 1).
2//!
3//! Walks a lowered [`Document`] and, in three passes:
4//!
5//! 1. Assigns hierarchical `number` attributes to every [`NodeKind::Section`]
6//!    (`"1"`, `"1.1"`, `"1.2"`, `"2"`), keyed off the existing `level`
7//!    attribute.
8//! 2. Assigns flat document-order `number` attributes to every numbered
9//!    [`NodeKind::Figure`] (`"1"`, `"2"`, `"3"`) and stamps a visible
10//!    `"{supplement} N: …"` label onto each captioned figure. Figures are
11//!    not hierarchical, so the counter never resets. A figure can opt out
12//!    with `numbered: false` (skipped: no number, no caption prefix, does
13//!    not advance the counter) or swap its supplement word with
14//!    `supplement: "…"` (issue #76).
15//! 3. Builds a `label → LabelTarget` index from every block carrying a
16//!    `label` attribute, then rewrites each [`NodeKind::Reference`]'s
17//!    `text` attribute to its target's resolved string.
18//!
19//! The label index is *typed*: each entry records what kind of thing
20//! the label points at (section, figure, or generic block). A section
21//! reference renders as its bare hierarchical number (`"1.2"`); a figure
22//! reference renders kind-aware as `"{supplement} {n}"` (`"Figure 1"` by
23//! default) from the figure's flat document-order number. Generic targets
24//! (paragraphs, raw blocks, images, skipped figures, …) carry no counter
25//! and render as the bare label, matching prior behavior.
26//!
27//! Diagnostics:
28//!
29//! - `MOS0030`: a label is declared more than once. The first occurrence
30//!   wins; later occurrences keep their numbering but are not added to
31//!   the index. Each duplicate also carries a structured rename
32//!   [`Suggestion`]; the next free `{label}-N` (`N >= 2`) that no other
33//!   declaration or earlier suggestion already uses: over the duplicate
34//!   label token span.
35//! - `MOS0033`: a `@label` reference targets a label that doesn't exist.
36//!   The reference's text is left at its lowered placeholder
37//!   (`?label?`) so it remains visible in the rendered output.
38//!
39//! Manifest §6 stage 3 calls for a fixpoint loop because later stages
40//! (page references, TOC) can re-trigger resolution. MVP 1 only needs a
41//! single pass: section numbering doesn't depend on layout, but the
42//! driver shape mirrors the manifest's "internal fixpoint" anyway: the
43//! loop runs until no rewrite changes the document, with a hard cap to
44//! detect pathological cycles.
45//!
46//! Every pass is **idempotent**: `resolve` is public and re-entrant, so
47//! running it twice: inside the fixpoint above, or from a future
48//! page-reference stage: must reproduce the same document rather than
49//! compounding edits. Numbering overwrites attributes with the same
50//! value; caption labelling re-derives from a preserved source instead
51//! of re-reading the already-stamped text (which would nest the label
52//! into `"Figure 1: Figure 1: …"`).
53
54use std::collections::{BTreeMap, BTreeSet};
55
56use mos_core::{
57    AttrValue, Diagnostic, DiagnosticAnnotation, Document, NodeKind, SourceSpan, Suggestion, codes,
58};
59
60use crate::{LABEL_SPAN_END_ATTR, LABEL_SPAN_START_ATTR};
61
62/// Cap on resolver fixpoint iterations. MVP 1 always converges in one
63/// pass; the cap is a safety net against forward-reference loops once
64/// page numbering lands in MVP 3+.
65const MAX_FIXPOINT_ITERATIONS: u32 = 8;
66
67/// What a label points at, captured at index-build time.
68///
69/// Each variant carries only the data needed to render the reference's
70/// display text: references never re-traverse the document via the
71/// target [`mos_core::NodeId`] once the index is built, so the resolver can stay
72/// kind-aware without exposing a node-typed handle to callers.
73#[derive(Clone, Debug, Eq, PartialEq)]
74enum LabelTargetKind {
75    /// Heading target with its resolved hierarchical number (e.g.
76    /// `"1.2"`).
77    Section { number: String },
78    /// Captioned figure with its resolved flat document-order number
79    /// (e.g. `"3"`) and supplement word (`"Figure"` by default, or a
80    /// custom `#figure(supplement: …)`). References render kind-aware as
81    /// `"{supplement} {number}"` (e.g. `"Figure 3"`, `"Plate 3"`). A
82    /// skipped (`numbered: false`) figure carries an empty number and
83    /// renders as its bare label instead.
84    Figure { number: String, supplement: String },
85    /// Anything else carrying a label (paragraph, raw block, image, …).
86    Generic,
87}
88
89/// An entry in the label → target index.
90///
91/// `span` is the declaration site, retained so duplicate-label
92/// diagnostics can still point a "first declared here" note at the
93/// original occurrence without re-looking-up the node by id.
94#[derive(Clone, Debug)]
95struct LabelTarget {
96    kind: LabelTargetKind,
97    span: SourceSpan,
98}
99
100/// Run the resolver pass over `document` in place. Returns any
101/// diagnostics produced; the document is modified regardless of whether
102/// errors are present so partial output is still renderable.
103pub fn resolve(document: &mut Document, bib_keys: &BTreeSet<String>) -> Vec<Diagnostic> {
104    let mut diagnostics: Vec<Diagnostic> = Vec::new();
105    number_sections(document);
106    number_figures(document);
107    let labels = build_label_index(document, &mut diagnostics);
108    validate_page_references(document, &labels, &mut diagnostics);
109
110    for _ in 0..MAX_FIXPOINT_ITERATIONS {
111        let changed = rewrite_references(document, &labels, bib_keys, &mut diagnostics);
112        if !changed {
113            break;
114        }
115    }
116
117    diagnostics
118}
119
120/// Report an undeclared label in a `@page(label)` reference as `MOS0033`,
121/// mirroring the `@label` cross-reference check. A page reference resolves to a
122/// page *number* later, through the layout fixpoint (issue #72), but an unknown
123/// *label* is a lower-time error exactly like a bad `@ref`, and catching it
124/// here means `mos check` reports it without needing to lay the document out.
125fn validate_page_references(
126    document: &Document,
127    labels: &BTreeMap<String, LabelTarget>,
128    diagnostics: &mut Vec<Diagnostic>,
129) {
130    for node in document
131        .nodes()
132        .filter(|node| node.kind == NodeKind::PageReference)
133    {
134        let Some(AttrValue::Str(label)) = node.attributes.get("label") else {
135            continue;
136        };
137        if labels.contains_key(label) {
138            continue;
139        }
140        let mut diagnostic = Diagnostic::simple(
141            &codes::MOS0033,
142            None,
143            format!("unknown label `{label}` in `@page` reference"),
144        )
145        .with_span(node.span.clone());
146        if let Some(candidate) = nearest_label(label, labels) {
147            diagnostic = diagnostic.with_suggestion(Suggestion::new(
148                node.span.clone(),
149                format!("@page({candidate})"),
150            ));
151        }
152        diagnostics.push(diagnostic);
153    }
154}
155
156/// Walk the document depth-first and assign hierarchical numbers to
157/// every section based on its `level` attribute. Sections without a
158/// readable `level` default to depth 1.
159fn number_sections(document: &mut Document) {
160    let order = section_order(document);
161    let mut counters: Vec<u32> = Vec::new();
162    for (id, level) in order {
163        let depth = usize::from(level.max(1));
164        if depth > counters.len() {
165            counters.resize(depth, 0);
166        } else {
167            counters.truncate(depth);
168        }
169        counters[depth - 1] += 1;
170        let number = counters
171            .iter()
172            .map(u32::to_string)
173            .collect::<Vec<_>>()
174            .join(".");
175        if let Some(node) = document.get_mut(id) {
176            node.attributes
177                .insert("number".to_owned(), AttrValue::Str(number));
178        }
179    }
180}
181
182fn section_order(document: &Document) -> Vec<(mos_core::NodeId, u8)> {
183    // Scan every `Section` in document order via the shared
184    // `nodes_of_kind` collector (the same traversal figure numbering
185    // uses). MVP 1 only emits flat sections under the root, but walking
186    // the whole arena means nested sections would still be numbered in
187    // order if the lowerer ever produced them.
188    nodes_of_kind(document, NodeKind::Section)
189        .into_iter()
190        .map(|id| {
191            let level = match document.get(id).and_then(|n| n.attributes.get("level")) {
192                Some(AttrValue::Int(n)) => u8::try_from((*n).clamp(1, 255)).unwrap_or(1),
193                _ => 1,
194            };
195            (id, level)
196        })
197        .collect()
198}
199
200/// Assign flat, document-order numbers to every figure (`"1"`, `"2"`,
201/// `"3"`, …) and stamp a visible `"Figure N: …"` label onto each
202/// captioned figure. Figures are not hierarchical, so the counter never
203/// resets.
204///
205/// The label is baked into the caption text here: rather than rendered
206/// by the layout engine the way section numbers are, so a numbered
207/// figure shows its number with no backend changes; distinct label
208/// *styling* is left to the future float/caption pass. The supplement
209/// word comes from [`figure_supplement`] (the single localization seam)
210/// and is joined to the number with a non-breaking space (U+00A0). That
211/// space is *semantic generated text*, not layout policy in disguise: it
212/// encodes `Figure` and its counter as one cohesive label token: the
213/// same non-breaking space an author could type by hand, which the
214/// layout engine merely honors. The resolver makes no wrapping decision
215/// of its own; it just emits the token.
216///
217/// The pass is **idempotent**: the pre-label caption is preserved under a
218/// `caption_source` attribute and the visible `text` is always re-derived
219/// from it. Re-running the resolver: as the §6 stage 3 fixpoint and any
220/// future page-reference pass do: therefore re-stamps the same label
221/// instead of nesting `"Figure 1: Figure 1: …"`, and stays correct when a
222/// figure is re-numbered, because the source never carries a stale counter.
223fn number_figures(document: &mut Document) {
224    // Counter advances only for numbered figures, so `#figure(numbered:
225    // false)` figures neither consume a number nor leave a gap: the
226    // numbered figures stay contiguous (1, 2, 3, …). This is the documented
227    // skip rule (issue #76).
228    let mut counter: usize = 0;
229    for figure_id in nodes_of_kind(document, NodeKind::Figure) {
230        // Read the per-figure controls before taking a `get_mut` borrow.
231        let Some((numbered, supplement)) = document
232            .get(figure_id)
233            .map(|node| (figure_is_numbered(node), figure_supplement_attr(node)))
234        else {
235            continue;
236        };
237        // Resolve the caption's *source* text before mutating: `get`
238        // borrows the document immutably, but the writes below need
239        // `get_mut`. Prefer the preserved `caption_source`; fall back to
240        // the live `text` only on the first pass, before any label has
241        // been stamped. Re-deriving the label from this stable source,
242        // never from the already-stamped `text`; that is what keeps `resolve`
243        // idempotent across reruns.
244        let caption = figure_caption_text(document, figure_id).and_then(|text_id| {
245            read_str_attr(document, text_id, "caption_source")
246                .or_else(|| read_str_attr(document, text_id, "text"))
247                .map(|source| (text_id, source))
248        });
249
250        if numbered {
251            counter += 1;
252            let number = counter.to_string();
253            if let Some(node) = document.get_mut(figure_id) {
254                node.attributes
255                    .insert("number".to_owned(), AttrValue::Str(number.clone()));
256            }
257            if let Some((text_id, caption_source)) = caption {
258                let labelled = format!(
259                    "{}: {caption_source}",
260                    figure_label_prefix(&supplement, &number)
261                );
262                if let Some(node) = document.get_mut(text_id) {
263                    // Stash the pre-label caption so later passes re-derive
264                    // the label from the original instead of the stamped text.
265                    node.attributes
266                        .insert("caption_source".to_owned(), AttrValue::Str(caption_source));
267                    node.attributes
268                        .insert("text".to_owned(), AttrValue::Str(labelled));
269                }
270            }
271        } else {
272            // Skipped figure: carry no number, and restore the caption to its
273            // unprefixed source. Restoring (rather than just not stamping)
274            // keeps the pass idempotent if a figure toggles numbered→skipped
275            // across reruns, undoing any previously stamped `Figure N:`.
276            if let Some(node) = document.get_mut(figure_id) {
277                node.attributes.remove("number");
278            }
279            if let Some((text_id, caption_source)) = caption
280                && let Some(node) = document.get_mut(text_id)
281            {
282                node.attributes.insert(
283                    "caption_source".to_owned(),
284                    AttrValue::Str(caption_source.clone()),
285                );
286                node.attributes
287                    .insert("text".to_owned(), AttrValue::Str(caption_source));
288            }
289        }
290    }
291}
292
293/// Collect the ids of every node of `kind` in document order. `nodes()`
294/// iterates the arena by ascending [`mos_core::NodeId`] (allocation
295/// order), and the lowerer allocates nodes in source order, so the
296/// result is stable document order regardless of nesting depth. Shared
297/// by figure numbering and [`section_order`] so both passes agree on
298/// what "document order" means.
299fn nodes_of_kind(document: &Document, kind: NodeKind) -> Vec<mos_core::NodeId> {
300    document
301        .nodes()
302        .filter(|node| node.kind == kind)
303        .map(|node| node.id)
304        .collect()
305}
306
307/// Find the text node of a figure's caption, if it has one. The lowerer
308/// tags the caption paragraph with `role = "caption"` and gives it a
309/// single [`NodeKind::Text`] child carrying the caption string.
310fn figure_caption_text(
311    document: &Document,
312    figure_id: mos_core::NodeId,
313) -> Option<mos_core::NodeId> {
314    let figure = document.get(figure_id)?;
315    for &child_id in &figure.children {
316        let Some(child) = document.get(child_id) else {
317            continue;
318        };
319        let is_caption = child.kind == NodeKind::Paragraph
320            && matches!(child.attributes.get("role"), Some(AttrValue::Str(role)) if role == "caption");
321        if !is_caption {
322            continue;
323        }
324        for &grandchild_id in &child.children {
325            if document
326                .get(grandchild_id)
327                .is_some_and(|gc| gc.kind == NodeKind::Text)
328            {
329                return Some(grandchild_id);
330            }
331        }
332    }
333    None
334}
335
336/// Read a string attribute off a node by id, cloning it out. `None` if
337/// the node is missing or the attribute is absent or non-string.
338fn read_str_attr(document: &Document, id: mos_core::NodeId, key: &str) -> Option<String> {
339    match document.get(id)?.attributes.get(key) {
340        Some(AttrValue::Str(s)) => Some(s.clone()),
341        _ => None,
342    }
343}
344
345/// The human-facing *supplement* word prefixed to a figure's number in
346/// generated reference and caption text; the "Figure" in "Figure 1".
347///
348/// This is the single localization seam for figure labels: LaTeX
349/// localizes it through babel's `\figurename`, Typst through
350/// `figure.supplement` under the document `text(lang: …)`. Mosaic
351/// captures a document `language` in metadata but does not yet thread it
352/// into the resolver, so this returns the English default; when that
353/// plumbing lands, a language-keyed lookup replaces the constant here
354/// without touching any call site. Sibling kinds (tables, equations,
355/// theorems) grow their own supplements alongside their numbering.
356fn figure_supplement() -> &'static str {
357    "Figure"
358}
359
360/// Whether a figure participates in the auto `Figure N` counter. A figure
361/// opts out with `#figure(numbered: false)` (issue #76), recorded by the
362/// lowerer as a `numbered = false` attribute; absence means numbered.
363fn figure_is_numbered(node: &mos_core::Node) -> bool {
364    !matches!(
365        node.attributes.get("numbered"),
366        Some(AttrValue::Bool(false))
367    )
368}
369
370/// The supplement word for a figure's caption and its references. An
371/// explicit `#figure(supplement: …)` value wins: **including the empty
372/// string** (`supplement: ""` / `supplement: none`), which means "number
373/// only, no word" (the "no visible prefix" form). Only an *absent*
374/// supplement falls back to the localized [`figure_supplement`] default
375/// (`"Figure"`).
376fn figure_supplement_attr(node: &mos_core::Node) -> String {
377    match node.attributes.get("supplement") {
378        Some(AttrValue::Str(s)) => s.clone(),
379        _ => figure_supplement().to_owned(),
380    }
381}
382
383/// Join a figure's supplement word and number into the cohesive label
384/// token used in both captions and references: `"Figure\u{00A0}1"`,
385/// non-breaking so the word never wraps off its number. An empty
386/// supplement renders the number alone (`"1"`), with no word and no
387/// leading space.
388fn figure_label_prefix(supplement: &str, number: &str) -> String {
389    if supplement.is_empty() {
390        number.to_owned()
391    } else {
392        format!("{supplement}\u{00A0}{number}")
393    }
394}
395
396/// Read a node's resolved `number` attribute, or an empty string if it
397/// has none. Both section and figure numbering stash their counter
398/// there before the label index is built; an empty result means the
399/// numbering pass didn't reach the node (a resolver/lowerer bug).
400fn captured_number(node: &mos_core::Node) -> String {
401    match node.attributes.get("number") {
402        Some(AttrValue::Str(s)) => s.clone(),
403        _ => String::new(),
404    }
405}
406
407/// Classify a labelled node into a [`LabelTargetKind`]. Only nodes
408/// that actually declare a label reach this function: references are
409/// filtered out by the caller.
410fn classify_target(node: &mos_core::Node) -> LabelTargetKind {
411    match node.kind {
412        NodeKind::Section => LabelTargetKind::Section {
413            number: captured_number(node),
414        },
415        NodeKind::Figure => LabelTargetKind::Figure {
416            number: captured_number(node),
417            supplement: figure_supplement_attr(node),
418        },
419        _ => LabelTargetKind::Generic,
420    }
421}
422
423/// Collect every label declared anywhere in the document: any non-reference
424/// block carrying a `label` attribute: regardless of document order or
425/// duplication. The duplicate-rename suggestion consults this set so it never
426/// proposes a name that some other declaration already uses.
427fn declared_labels(document: &Document) -> BTreeSet<String> {
428    document
429        .nodes()
430        .filter(|node| !matches!(node.kind, NodeKind::Reference | NodeKind::PageReference))
431        .filter_map(|node| match node.attributes.get("label") {
432            Some(AttrValue::Str(label)) => Some(label.clone()),
433            _ => None,
434        })
435        .collect()
436}
437
438/// Pick a deterministic, collision-aware rename for a duplicated `label`: the
439/// smallest integer suffix `N >= 2` whose `{label}-{N}` is not already in
440/// `declared`. Boring and stable; no similarity ranking, but it steps over
441/// existing labels so the suggested fix never re-creates the clash it
442/// resolves. Among the first `declared.len() + 1` candidates at least one is
443/// free (pigeonhole), so the bounded search always yields a name.
444fn nonconflicting_rename(label: &str, declared: &BTreeSet<String>) -> String {
445    let ceiling = declared.len().saturating_add(2);
446    (2..=ceiling)
447        .map(|n| format!("{label}-{n}"))
448        .find(|candidate| !declared.contains(candidate))
449        .unwrap_or_else(|| format!("{label}-{ceiling}"))
450}
451
452/// Build the `label -> LabelTarget` index from every label-declaring block,
453/// reporting `MOS0030` for redeclarations. The first declaration of a label
454/// wins; later occurrences keep their numbering but are not indexed, and each
455/// carries a related note pointing at the first declaration plus a structured
456/// rename [`Suggestion`]; the next free `{label}-N`: over the duplicate label
457/// token span (see the module-level docs). Reads the document only, so
458/// `resolve` stays idempotent.
459fn build_label_index(
460    document: &Document,
461    diagnostics: &mut Vec<Diagnostic>,
462) -> BTreeMap<String, LabelTarget> {
463    let mut occupied_labels = declared_labels(document);
464    let mut index: BTreeMap<String, LabelTarget> = BTreeMap::new();
465    for node in document.nodes() {
466        // References *consume* labels; only blocks declare them. Treating a
467        // `@ref` or `@page(ref)`'s `label` attribute as a declaration would
468        // shadow the real target (and falsely trip the duplicate-label check).
469        if matches!(node.kind, NodeKind::Reference | NodeKind::PageReference) {
470            continue;
471        }
472        let Some(AttrValue::Str(label)) = node.attributes.get("label") else {
473            continue;
474        };
475        if let Some(existing) = index.get(label) {
476            // Offer a deterministic, collision-aware rename for the duplicate:
477            // the next free `{label}-N` that no declaration, or earlier
478            // suggestion in this pass, already uses. Still a boring stable
479            // rule, not a similarity-ranked guess. The fix targets only the
480            // duplicate label token span so applying it preserves the
481            // surrounding heading/directive syntax.
482            let rename = nonconflicting_rename(label, &occupied_labels);
483            occupied_labels.insert(rename.clone());
484            let suggestion = label_span(node).map(|span| Suggestion::new(span, rename));
485            let mut diagnostic = Diagnostic::simple(
486                &codes::MOS0030,
487                None,
488                format!("label `{label}` is declared more than once"),
489            )
490            .with_span(node.span.clone())
491            .with_annotation(DiagnosticAnnotation::Related {
492                span: existing.span.clone(),
493                message: format!("first declaration of `{label}` is here"),
494            });
495            if let Some(suggestion) = suggestion {
496                diagnostic = diagnostic.with_suggestion(suggestion);
497            }
498            diagnostics.push(diagnostic);
499            continue;
500        }
501        index.insert(
502            label.clone(),
503            LabelTarget {
504                kind: classify_target(node),
505                span: node.span.clone(),
506            },
507        );
508    }
509    index
510}
511
512fn label_span(node: &mos_core::Node) -> Option<SourceSpan> {
513    let start = match node.attributes.get(LABEL_SPAN_START_ATTR) {
514        Some(AttrValue::Int(value)) => usize::try_from(*value).ok()?,
515        _ => return None,
516    };
517    let end = match node.attributes.get(LABEL_SPAN_END_ATTR) {
518        Some(AttrValue::Int(value)) => usize::try_from(*value).ok()?,
519        _ => return None,
520    };
521    if start > end {
522        return None;
523    }
524    Some(SourceSpan::new(node.span.file.clone(), start, end))
525}
526
527/// Compute the display string for a reference to `target`.
528///
529/// Section targets render as their bare hierarchical counter (e.g.
530/// `"1.2"`). Figure targets render kind-aware as `"Figure N"`: the
531/// localized [`figure_supplement`] joined to the figure's flat
532/// document-order counter with a non-breaking space (U+00A0): one
533/// cohesive label token the layout engine honors, not a wrapping
534/// decision made here (see [`number_figures`]). Generic targets
535/// (paragraphs, images, raw blocks) have no counter and render as the
536/// bare label.
537fn render_target(target: &LabelTarget, label: &str) -> String {
538    match &target.kind {
539        LabelTargetKind::Section { number } if !number.is_empty() => number.clone(),
540        LabelTargetKind::Figure { number, supplement } if !number.is_empty() => {
541            figure_label_prefix(supplement, number)
542        }
543        // A numbered target carrying an empty number is a resolver/lowerer
544        // bug; fall back to the label name so the output stays readable.
545        LabelTargetKind::Section { .. }
546        | LabelTargetKind::Figure { .. }
547        | LabelTargetKind::Generic => label.to_owned(),
548    }
549}
550
551/// Whether `label` can be spelled as an `@` reference: i.e. it is drawn
552/// from the reference grammar's alphabet `[A-Za-z0-9_:.-]` (mirrors
553/// `scan_label_chars` in `mos-parse`). `#figure(label: …)` and
554/// `#image(label: …)` accept arbitrary strings, so the label index can hold
555/// names such as `"intro x"` or non-ASCII labels that an `@…` reference can never name;
556/// suggesting one would produce a fix that does not parse.
557fn is_reference_label(label: &str) -> bool {
558    !label.is_empty()
559        && label
560            .bytes()
561            .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'-' | b':' | b'.'))
562}
563
564/// Levenshtein edit distance between `a` and `b` over their bytes.
565///
566/// Callers only pass reference-alphabet labels (the parsed reference name and
567/// [`is_reference_label`] candidates), all ASCII, so byte distance equals
568/// character distance while staying allocation-light: one reusable row, where
569/// `row[j]` holds the distance from the processed prefix of `a` to `b[..j]`.
570fn edit_distance(a: &str, b: &str) -> usize {
571    let b = b.as_bytes();
572    let mut row: Vec<usize> = (0..=b.len()).collect();
573    for (i, &ai) in a.as_bytes().iter().enumerate() {
574        let mut diag = row[0];
575        row[0] = i + 1;
576        for (j, &bj) in b.iter().enumerate() {
577            let cost = usize::from(ai != bj);
578            let sub = diag + cost;
579            diag = row[j + 1];
580            row[j + 1] = sub.min(row[j + 1] + 1).min(row[j] + 1);
581        }
582    }
583    row[b.len()]
584}
585
586/// The single nearest *resolvable* label to `unknown`, when one is a
587/// reasonable near-miss rather than an unrelated string; the candidate for a
588/// "did you mean `@intro`?" fix on an unknown reference.
589///
590/// "Reasonable" is deliberately conservative:
591///
592/// - references shorter than three bytes get no suggestion (a one-edit guess
593///   on a one- or two-byte name is noise, not help);
594/// - the edit distance must be within `unknown.len() / 3`: rustc's "did you
595///   mean" heuristic. With the length floor that bound is always at least 1,
596///   admitting `intrdo` → `intro` (distance 1, bound 2) while rejecting wholly
597///   unrelated names.
598///
599/// Candidates are the label-index keys that [`is_reference_label`] accepts.
600/// The index is the resolvable, first-occurrence-wins set, so any surviving
601/// candidate both resolves and is spellable as `@candidate`. Ties break on
602/// `(distance, label)`; the `BTreeMap` already yields labels in sorted order,
603/// so the choice is identical on every run and every fixpoint pass.
604fn nearest_label(unknown: &str, labels: &BTreeMap<String, LabelTarget>) -> Option<String> {
605    if unknown.len() < 3 {
606        return None;
607    }
608    let max_distance = unknown.len() / 3;
609    labels
610        .keys()
611        .filter(|label| is_reference_label(label))
612        .map(|label| (edit_distance(unknown, label), label))
613        .filter(|&(distance, _)| distance <= max_distance)
614        .min_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(b.1)))
615        .map(|(_, label)| label.clone())
616}
617
618/// Rewrite each `Reference` node's `text` attribute to point at its
619/// target. Returns true if any node was mutated this iteration:
620/// callers use that signal to drive the §6 stage 3 fixpoint loop.
621fn rewrite_references(
622    document: &mut Document,
623    labels: &BTreeMap<String, LabelTarget>,
624    bib_keys: &BTreeSet<String>,
625    diagnostics: &mut Vec<Diagnostic>,
626) -> bool {
627    let references: Vec<mos_core::NodeId> = document
628        .nodes()
629        .filter(|n| n.kind == NodeKind::Reference)
630        .map(|n| n.id)
631        .collect();
632
633    let mut changed = false;
634    for ref_id in references {
635        let Some(node) = document.get(ref_id) else {
636            continue;
637        };
638        let Some(AttrValue::Str(label)) = node.attributes.get("label").cloned() else {
639            continue;
640        };
641        let resolved_text = if let Some(target) = labels.get(&label) {
642            render_target(target, &label)
643        } else {
644            let already_diagnosed = diagnostics
645                .iter()
646                .any(|d| d.def().code() == codes::MOS0033.code() && d.span() == Some(&node.span));
647            if !already_diagnosed {
648                let mut diagnostic = Diagnostic::simple(
649                    &codes::MOS0033,
650                    None,
651                    format!("unknown label `{label}` in `@` reference"),
652                )
653                .with_span(node.span.clone());
654                // An `@key` that misses every label but exactly matches a
655                // bibliography key is a citation written with the wrong
656                // syntax (`@key` instead of `[@key]`). That exact match is a
657                // stronger signal than any label near-miss, so it wins: offer
658                // the citation form and say why. `node.span` covers the whole
659                // `@label` token (sigil included), so the replacement supplies
660                // the full `[@key]`.
661                if bib_keys.contains(&label) {
662                    diagnostic = diagnostic
663                        .with_annotation(DiagnosticAnnotation::Hint(format!(
664                            "`{label}` is a bibliography key; cite it as `[@{label}]`"
665                        )))
666                        .with_suggestion(Suggestion::new(node.span.clone(), format!("[@{label}]")));
667                } else if let Some(candidate) = nearest_label(&label, labels) {
668                    // Offer the nearest existing label as a machine-applicable
669                    // fix (`@intrdo` -> `@intro`) when a reasonable near-miss
670                    // exists. The replacement carries its own `@`.
671                    diagnostic = diagnostic.with_suggestion(Suggestion::new(
672                        node.span.clone(),
673                        format!("@{candidate}"),
674                    ));
675                }
676                diagnostics.push(diagnostic);
677            }
678            continue;
679        };
680
681        if let Some(node) = document.get_mut(ref_id) {
682            let new = AttrValue::Str(resolved_text);
683            if node.attributes.get("text") != Some(&new) {
684                node.attributes.insert("text".to_owned(), new);
685                changed = true;
686            }
687        }
688    }
689    changed
690}
691
692#[cfg(test)]
693mod tests {
694    use std::path::PathBuf;
695
696    use mos_core::Severity;
697
698    use super::*;
699
700    fn lower(src: &str) -> (Document, Vec<Diagnostic>) {
701        let r = crate::lower(src, &PathBuf::from("test.mos"));
702        (r.document, r.diagnostics)
703    }
704
705    fn apply_suggestion(src: &str, suggestion: &Suggestion) -> String {
706        let mut out = String::new();
707        out.push_str(&src[..suggestion.span.start()]);
708        out.push_str(&suggestion.replacement);
709        out.push_str(&src[suggestion.span.end()..]);
710        out
711    }
712
713    fn section_numbers(doc: &Document) -> Vec<(String, String)> {
714        doc.nodes()
715            .filter(|n| n.kind == NodeKind::Section)
716            .map(|n| {
717                let title = n
718                    .children
719                    .iter()
720                    .filter_map(|c| doc.get(*c))
721                    .find_map(|c| match c.attributes.get("text") {
722                        Some(AttrValue::Str(s)) => Some(s.clone()),
723                        _ => None,
724                    })
725                    .unwrap_or_default();
726                let number = match n.attributes.get("number") {
727                    Some(AttrValue::Str(s)) => s.clone(),
728                    _ => String::new(),
729                };
730                (title, number)
731            })
732            .collect()
733    }
734
735    #[test]
736    fn assigns_hierarchical_section_numbers() {
737        let (doc, diags) = lower("= Intro\n\n== Background\n\n== Aims\n\n= Methods\n\n== Sample\n");
738        assert!(diags.is_empty(), "{diags:?}");
739        let nums = section_numbers(&doc);
740        let pairs: Vec<(&str, &str)> = nums.iter().map(|(t, n)| (t.as_str(), n.as_str())).collect();
741        assert_eq!(
742            pairs,
743            vec![
744                ("Intro", "1"),
745                ("Background", "1.1"),
746                ("Aims", "1.2"),
747                ("Methods", "2"),
748                ("Sample", "2.1"),
749            ]
750        );
751    }
752
753    #[test]
754    fn duplicate_label_emits_mos0030_and_keeps_first() {
755        let src = "= A <dup>\n\n= B <dup>\n\nsee @dup\n";
756        let (doc, diags) = lower(src);
757        let mos0030: Vec<&Diagnostic> = diags
758            .iter()
759            .filter(|d| d.def().code() == codes::MOS0030.code())
760            .collect();
761        assert_eq!(
762            mos0030.len(),
763            1,
764            "expected exactly one MOS0030, got {diags:?}"
765        );
766        let d = mos0030[0];
767        assert_eq!(d.def().code(), codes::MOS0030.code());
768        assert_eq!(d.severity(), Severity::Error);
769        assert!(
770            d.message().contains("`dup`"),
771            "MOS0030 message should name the duplicated label, got {:?}",
772            d.message()
773        );
774        // The duplicate diagnostic must point at the *second* occurrence
775        // and carry a Related annotation back to the first declaration.
776        // Editor UIs rely on both spans to render the redeclaration jump.
777        assert_eq!(
778            d.span().map(|span| &src[span.start()..span.end()]),
779            Some("= B <dup>"),
780            "MOS0030 span should cover the second heading exactly"
781        );
782        assert_eq!(
783            d.annotations().len(),
784            1,
785            "MOS0030 should reference the first decl"
786        );
787        let related = d.annotations().iter().find_map(|a| match a {
788            DiagnosticAnnotation::Related { span, message } => Some((span, message)),
789            _ => None,
790        });
791        assert!(related.is_some(), "MOS0030 carries a Related annotation");
792        if let Some((note_span, note_message)) = related {
793            assert_eq!(
794                &src[note_span.start()..note_span.end()],
795                "= A <dup>",
796                "MOS0030 note should point at the original declaration exactly"
797            );
798            assert!(
799                note_message.contains("`dup`"),
800                "first-decl note should name the label, got {note_message:?}"
801            );
802        }
803        // The duplicate carries exactly one structured rename suggestion:
804        // replace only the duplicate label token with the smallest free
805        // `dup-2` candidate (nothing else here claims it). Editors apply this
806        // as a fix-it, so the payload: span + replacement: must preserve the
807        // surrounding heading syntax.
808        let suggestions = d.suggestions();
809        assert_eq!(
810            suggestions.len(),
811            1,
812            "MOS0030 should carry exactly one rename suggestion, got {suggestions:?}"
813        );
814        if let Some(suggestion) = suggestions.first() {
815            assert_eq!(
816                &src[suggestion.span.start()..suggestion.span.end()],
817                "dup",
818                "suggestion span should cover only the duplicate label token"
819            );
820            assert_eq!(
821                suggestion.replacement, "dup-2",
822                "suggestion should rename the duplicate label deterministically"
823            );
824            assert_eq!(
825                apply_suggestion(src, suggestion),
826                "= A <dup>\n\n= B <dup-2>\n\nsee @dup\n",
827                "applying the fix must preserve the heading and label delimiters"
828            );
829        }
830        // Reference still resolves to the first declaration's number.
831        let reference_text = doc
832            .nodes()
833            .find(|n| n.kind == NodeKind::Reference)
834            .and_then(|n| n.attributes.get("text"));
835        assert_eq!(reference_text, Some(&AttrValue::Str("1".to_owned())));
836    }
837
838    #[test]
839    fn triple_duplicate_label_emits_one_mos0030_per_redeclaration() {
840        // Three sections share `dup`. The first wins; the second and
841        // third each get their own MOS0030 pointing back at the first.
842        // The reference still resolves to section number `1`.
843        let src = "= A <dup>\n\n= B <dup>\n\n= C <dup>\n\nsee @dup\n";
844        let (doc, diags) = lower(src);
845        let mos0030: Vec<&Diagnostic> = diags
846            .iter()
847            .filter(|d| d.def().code() == codes::MOS0030.code())
848            .collect();
849        assert_eq!(
850            mos0030.len(),
851            2,
852            "expected two MOS0030 (one per redeclaration), got {diags:?}"
853        );
854        let spans: Vec<&str> = mos0030
855            .iter()
856            .filter_map(|d| d.span().map(|s| &src[s.start()..s.end()]))
857            .collect();
858        assert_eq!(
859            spans.len(),
860            mos0030.len(),
861            "every MOS0030 must carry a primary span"
862        );
863        assert!(
864            spans.contains(&"= B <dup>"),
865            "missing span for second decl, got {spans:?}"
866        );
867        assert!(
868            spans.contains(&"= C <dup>"),
869            "missing span for third decl, got {spans:?}"
870        );
871        // Every duplicate diagnostic must reference the same first decl.
872        for d in &mos0030 {
873            let related = d.annotations().iter().find_map(|a| match a {
874                DiagnosticAnnotation::Related { span, message } => Some((span, message)),
875                _ => None,
876            });
877            assert!(related.is_some(), "MOS0030 carries a Related annotation");
878            if let Some((ns, _)) = related {
879                assert_eq!(
880                    &src[ns.start()..ns.end()],
881                    "= A <dup>",
882                    "every redeclaration must link back to the first decl"
883                );
884            }
885            // Each redeclaration carries its own deterministic rename
886            // suggestion over its own label-token span. Generated suggestions
887            // are reserved during this resolver pass, so bulk-applying both
888            // fixes does not create a fresh duplicate.
889            let suggestions = d.suggestions();
890            assert_eq!(
891                suggestions.len(),
892                1,
893                "each MOS0030 carries exactly one rename suggestion, got {suggestions:?}"
894            );
895            if let Some(suggestion) = suggestions.first() {
896                assert_eq!(&src[suggestion.span.start()..suggestion.span.end()], "dup");
897            }
898        }
899        let replacements: Vec<&str> = mos0030
900            .iter()
901            .filter_map(|d| d.suggestions().first())
902            .map(|suggestion| suggestion.replacement.as_str())
903            .collect();
904        assert_eq!(replacements, vec!["dup-2", "dup-3"]);
905        let reference_text = doc
906            .nodes()
907            .find(|n| n.kind == NodeKind::Reference)
908            .and_then(|n| n.attributes.get("text"));
909        assert_eq!(reference_text, Some(&AttrValue::Str("1".to_owned())));
910    }
911
912    #[test]
913    fn duplicate_suggestion_skips_existing_label() {
914        // `dup-2` already names another block, so the collision-aware rename
915        // for the duplicate `dup` must step over it to `dup-3` rather than
916        // propose a name that would just re-collide. Only `dup` is
917        // duplicated; `dup-2` is a distinct, valid label (hyphens are legal
918        // label chars).
919        let src = "= A <dup>\n\n= B <dup-2>\n\n= C <dup>\n";
920        let (_doc, diags) = lower(src);
921        let mos0030: Vec<&Diagnostic> = diags
922            .iter()
923            .filter(|d| d.def().code() == codes::MOS0030.code())
924            .collect();
925        assert_eq!(mos0030.len(), 1, "only `dup` is duplicated, got {diags:?}");
926        let d = mos0030[0];
927        let suggestions = d.suggestions();
928        assert_eq!(
929            suggestions.len(),
930            1,
931            "the duplicate carries one rename suggestion, got {suggestions:?}"
932        );
933        if let Some(suggestion) = suggestions.first() {
934            assert_eq!(
935                suggestion.replacement, "dup-3",
936                "rename must skip the existing `dup-2` and land on the next free suffix"
937            );
938            assert_eq!(
939                &src[suggestion.span.start()..suggestion.span.end()],
940                "dup",
941                "suggestion targets the duplicate label token"
942            );
943            assert_eq!(
944                apply_suggestion(src, suggestion),
945                "= A <dup>\n\n= B <dup-2>\n\n= C <dup-3>\n",
946                "applying the fix must preserve the duplicate declaration syntax"
947            );
948        }
949    }
950
951    #[test]
952    fn unknown_label_emits_mos0033() {
953        let (doc, diags) = lower("see @no:such\n");
954        let mos0033: Vec<&Diagnostic> = diags
955            .iter()
956            .filter(|d| d.def().code() == codes::MOS0033.code())
957            .collect();
958        assert_eq!(
959            mos0033.len(),
960            1,
961            "expected exactly one MOS0033 even with the fixpoint loop, got {diags:?}"
962        );
963        let d = mos0033[0];
964        assert_eq!(d.def().code(), codes::MOS0033.code());
965        assert_eq!(d.severity(), Severity::Error);
966        assert!(
967            d.message().contains("`no:such`"),
968            "MOS0033 message should name the missing label, got {:?}",
969            d.message()
970        );
971        assert!(
972            d.span().is_some(),
973            "MOS0033 must carry a span so editors can jump to the bad reference"
974        );
975        let reference_text = doc
976            .nodes()
977            .find(|n| n.kind == NodeKind::Reference)
978            .and_then(|n| n.attributes.get("text"));
979        // Placeholder text is preserved so the diagnostic location is
980        // visible in the rendered output.
981        assert_eq!(
982            reference_text,
983            Some(&AttrValue::Str("?no:such?".to_owned()))
984        );
985    }
986
987    #[test]
988    fn multiple_unknown_references_each_emit_one_mos0033() {
989        // Three distinct unknown labels in a single paragraph produce one
990        // diagnostic apiece in a single resolver pass.
991        let src = "see @alpha and @beta and @gamma\n";
992        let (_doc, diags) = lower(src);
993        let mos0033: Vec<&Diagnostic> = diags
994            .iter()
995            .filter(|d| d.def().code() == codes::MOS0033.code())
996            .collect();
997        assert_eq!(
998            mos0033.len(),
999            3,
1000            "expected one MOS0033 per unknown label, got {diags:?}"
1001        );
1002        let labels: BTreeSet<&str> = mos0033
1003            .iter()
1004            .filter_map(|d| {
1005                // Each MOS0033's message is `unknown label `<name>` in `@` reference`.
1006                let msg = &d.message();
1007                let start = msg.find('`')? + 1;
1008                let end = start + msg[start..].find('`')?;
1009                Some(&msg[start..end])
1010            })
1011            .collect();
1012        assert_eq!(
1013            labels,
1014            ["alpha", "beta", "gamma"].into_iter().collect(),
1015            "each unknown label should appear exactly once"
1016        );
1017    }
1018
1019    #[test]
1020    fn unknown_reference_suggestion_is_not_duplicated_after_fixpoint_rerun() {
1021        // The resolved `@intro` changes reference text on the first pass, so
1022        // the fixpoint runs again. The unknown `@intrdo` must still get one
1023        // MOS0033 with one structured suggestion, not one per iteration.
1024        let src = "= Intro <intro>\n\nsee @intro and @intrdo\n";
1025        let (doc, diags) = lower(src);
1026        let mos0033: Vec<&Diagnostic> = diags
1027            .iter()
1028            .filter(|d| d.def().code() == codes::MOS0033.code())
1029            .collect();
1030        assert_eq!(
1031            mos0033.len(),
1032            1,
1033            "expected one MOS0033 after fixpoint rerun, got {diags:?}"
1034        );
1035        let d = mos0033[0];
1036        let suggestions = d.suggestions();
1037        assert_eq!(
1038            suggestions.len(),
1039            1,
1040            "expected one suggestion after fixpoint rerun, got {suggestions:?}"
1041        );
1042        if let Some(suggestion) = suggestions.first() {
1043            assert_eq!(suggestion.replacement, "@intro");
1044            assert_eq!(
1045                apply_suggestion(src, suggestion),
1046                "= Intro <intro>\n\nsee @intro and @intro\n",
1047                "fix should replace only the unknown reference token"
1048            );
1049        }
1050        let reference_texts: Vec<&str> = doc
1051            .nodes()
1052            .filter(|n| n.kind == NodeKind::Reference)
1053            .filter_map(|n| match n.attributes.get("text") {
1054                Some(AttrValue::Str(s)) => Some(s.as_str()),
1055                _ => None,
1056            })
1057            .collect();
1058        assert_eq!(
1059            reference_texts,
1060            vec!["1", "?intrdo?"],
1061            "resolved refs rewrite while unknown refs keep visible placeholders"
1062        );
1063    }
1064
1065    #[test]
1066    fn reference_resolves_to_section_number() {
1067        let (doc, diags) =
1068            lower("= Intro <intro>\n\n= Methods <methods>\n\nsee @methods and @intro\n");
1069        assert!(diags.is_empty(), "{diags:?}");
1070        let refs: Vec<String> = doc
1071            .nodes()
1072            .filter(|n| n.kind == NodeKind::Reference)
1073            .filter_map(|n| match n.attributes.get("text") {
1074                Some(AttrValue::Str(s)) => Some(s.clone()),
1075                _ => None,
1076            })
1077            .collect();
1078        assert_eq!(refs, vec!["2".to_owned(), "1".to_owned()]);
1079    }
1080
1081    #[test]
1082    fn paragraph_label_indexes_paragraph() {
1083        // A paragraph-attached label has no section number, so the
1084        // resolver falls back to using the bare label as the rewritten
1085        // text. No MOS0033 is emitted because the target exists.
1086        let (doc, diags) = lower("<note> a side note here\n\nsee @note\n");
1087        assert!(diags.is_empty(), "{diags:?}");
1088        let reference_text = doc
1089            .nodes()
1090            .find(|n| n.kind == NodeKind::Reference)
1091            .and_then(|n| n.attributes.get("text"));
1092        assert_eq!(reference_text, Some(&AttrValue::Str("note".to_owned())));
1093    }
1094
1095    /// Build a synthetic node with `kind`, `label`, and (optionally) a
1096    /// section `number`. Used by classifier tests to exercise typed
1097    /// targets without dragging in image/file I/O.
1098    fn make_node(
1099        doc: &mut Document,
1100        kind: NodeKind,
1101        label: Option<&str>,
1102        number: Option<&str>,
1103    ) -> mos_core::NodeId {
1104        let mut attrs = mos_core::AttrMap::new();
1105        if let Some(l) = label {
1106            attrs.insert("label".to_owned(), AttrValue::Str(l.to_owned()));
1107        }
1108        if let Some(n) = number {
1109            attrs.insert("number".to_owned(), AttrValue::Str(n.to_owned()));
1110        }
1111        doc.alloc_child(
1112            doc.root,
1113            mos_core::NodeSpec::new(kind, SourceSpan::placeholder(doc.file.clone()))
1114                .with_attributes(attrs),
1115        )
1116    }
1117
1118    /// Build a synthetic `Text` node under `parent` carrying `text`,
1119    /// returning its id. The lowerer's caption text nodes have exactly
1120    /// this shape.
1121    fn make_text(doc: &mut Document, parent: mos_core::NodeId, text: &str) -> mos_core::NodeId {
1122        let mut attrs = mos_core::AttrMap::new();
1123        attrs.insert("text".to_owned(), AttrValue::Str(text.to_owned()));
1124        doc.alloc_child(
1125            parent,
1126            mos_core::NodeSpec::new(NodeKind::Text, SourceSpan::placeholder(doc.file.clone()))
1127                .with_attributes(attrs),
1128        )
1129    }
1130
1131    /// Build a `Figure` (optionally labelled) carrying a `role = "caption"`
1132    /// paragraph whose single `Text` child holds `caption`. Returns the
1133    /// figure id and the caption text-node id so tests can assert on the
1134    /// stamped label. Mirrors the shape the lowerer produces for a
1135    /// captioned `#figure`.
1136    fn make_captioned_figure(
1137        doc: &mut Document,
1138        label: Option<&str>,
1139        caption: &str,
1140    ) -> (mos_core::NodeId, mos_core::NodeId) {
1141        let figure = make_node(doc, NodeKind::Figure, label, None);
1142        let mut caption_attrs = mos_core::AttrMap::new();
1143        caption_attrs.insert("role".to_owned(), AttrValue::Str("caption".to_owned()));
1144        let caption_para = doc.alloc_child(
1145            figure,
1146            mos_core::NodeSpec::new(
1147                NodeKind::Paragraph,
1148                SourceSpan::placeholder(doc.file.clone()),
1149            )
1150            .with_attributes(caption_attrs),
1151        );
1152        let caption_text = make_text(doc, caption_para, caption);
1153        (figure, caption_text)
1154    }
1155
1156    /// Read a node's resolved `number` attribute as an owned string, or
1157    /// the empty string if the node is missing or unnumbered. Test-only
1158    /// convenience wrapping [`captured_number`] for the numbering
1159    /// assertions below.
1160    fn node_number(doc: &Document, id: mos_core::NodeId) -> String {
1161        doc.get(id).map(captured_number).unwrap_or_default()
1162    }
1163
1164    #[test]
1165    fn classify_target_distinguishes_kinds() {
1166        let mut doc = Document::new(PathBuf::from("test.mos"));
1167        let section_id = make_node(&mut doc, NodeKind::Section, Some("sec"), Some("1.2"));
1168        let figure_id = make_node(&mut doc, NodeKind::Figure, Some("fig"), Some("3"));
1169        let paragraph_id = make_node(&mut doc, NodeKind::Paragraph, Some("p"), None);
1170
1171        assert_eq!(
1172            doc.get(section_id).map(classify_target),
1173            Some(LabelTargetKind::Section {
1174                number: "1.2".to_owned()
1175            })
1176        );
1177
1178        assert_eq!(
1179            doc.get(figure_id).map(classify_target),
1180            Some(LabelTargetKind::Figure {
1181                number: "3".to_owned(),
1182                supplement: "Figure".to_owned(),
1183            })
1184        );
1185
1186        assert_eq!(
1187            doc.get(paragraph_id).map(classify_target),
1188            Some(LabelTargetKind::Generic)
1189        );
1190    }
1191
1192    #[test]
1193    fn figure_reference_renders_kind_aware_text() {
1194        // Constructs a Figure node with a label and a Reference to it,
1195        // then runs the full resolver. Verifies:
1196        //   - the figure receives document-order number "1",
1197        //   - the figure label is found (no MOS0033),
1198        //   - the label index records the target as a numbered `Figure`,
1199        //   - the reference's rewritten text is kind-aware `"Figure 1"`,
1200        //     not the bare label name.
1201        let mut doc = Document::new(PathBuf::from("test.mos"));
1202        let figure_id = make_node(&mut doc, NodeKind::Figure, Some("fig:one"), None);
1203        let ref_id = doc.alloc_child(
1204            doc.root,
1205            mos_core::NodeSpec::new(
1206                NodeKind::Reference,
1207                SourceSpan::placeholder(doc.file.clone()),
1208            )
1209            .with_attributes({
1210                let mut a = mos_core::AttrMap::new();
1211                a.insert("label".to_owned(), AttrValue::Str("fig:one".to_owned()));
1212                a.insert("text".to_owned(), AttrValue::Str("?fig:one?".to_owned()));
1213                a
1214            }),
1215        );
1216
1217        let diags = resolve(&mut doc, &BTreeSet::new());
1218        assert!(diags.is_empty(), "{diags:?}");
1219
1220        // The figure carries its resolved document-order number.
1221        assert_eq!(
1222            doc.get(figure_id).and_then(|f| f.attributes.get("number")),
1223            Some(&AttrValue::Str("1".to_owned()))
1224        );
1225
1226        let mut sink: Vec<Diagnostic> = Vec::new();
1227        let index = build_label_index(&doc, &mut sink);
1228        assert!(sink.is_empty(), "{sink:?}");
1229        assert_eq!(
1230            index.get("fig:one").map(|target| &target.kind),
1231            Some(&LabelTargetKind::Figure {
1232                number: "1".to_owned(),
1233                supplement: "Figure".to_owned(),
1234            })
1235        );
1236
1237        assert_eq!(
1238            doc.get(ref_id).and_then(|r| r.attributes.get("text")),
1239            Some(&AttrValue::Str("Figure\u{00A0}1".to_owned())),
1240            "a figure reference resolves to kind-aware `Figure N` text, joined by a non-breaking space"
1241        );
1242    }
1243
1244    #[test]
1245    fn captioned_figure_gets_supplement_label_stamped() {
1246        // A figure with a `role = "caption"` paragraph gets its caption
1247        // text prefixed with the non-breaking `Figure N: ` label so the
1248        // number is visible; the figure itself is still numbered "1".
1249        let mut doc = Document::new(PathBuf::from("test.mos"));
1250        let (figure, caption_text) = make_captioned_figure(&mut doc, Some("fig:a"), "A plot.");
1251
1252        let diags = resolve(&mut doc, &BTreeSet::new());
1253        assert!(diags.is_empty(), "{diags:?}");
1254
1255        assert_eq!(node_number(&doc, figure), "1");
1256        assert_eq!(
1257            read_str_attr(&doc, caption_text, "text"),
1258            Some("Figure\u{00A0}1: A plot.".to_owned()),
1259            "the caption is prefixed with the non-breaking `Figure N: ` label"
1260        );
1261    }
1262
1263    #[test]
1264    fn skipped_figure_omits_label_and_does_not_advance_counter() {
1265        // `#figure(numbered: false)` opts out of numbering (issue #76): no
1266        // `number` attribute, no `Figure N:` caption prefix, and the
1267        // documented counter rule; the skip does not advance the counter,
1268        // so a later numbered figure is still "Figure 1", not "Figure 2".
1269        let mut doc = Document::new(PathBuf::from("test.mos"));
1270        let (skipped, skipped_caption) =
1271            make_captioned_figure(&mut doc, Some("fig:skip"), "Decorative.");
1272        if let Some(node) = doc.get_mut(skipped) {
1273            node.attributes
1274                .insert("numbered".to_owned(), AttrValue::Bool(false));
1275        }
1276        let (numbered, numbered_caption) =
1277            make_captioned_figure(&mut doc, Some("fig:num"), "A plot.");
1278
1279        let diags = resolve(&mut doc, &BTreeSet::new());
1280        assert!(diags.is_empty(), "{diags:?}");
1281
1282        assert_eq!(
1283            node_number(&doc, skipped),
1284            "",
1285            "a skipped figure carries no number"
1286        );
1287        assert_eq!(
1288            read_str_attr(&doc, skipped_caption, "text"),
1289            Some("Decorative.".to_owned()),
1290            "a skipped figure's caption keeps no `Figure N:` prefix"
1291        );
1292        assert_eq!(
1293            node_number(&doc, numbered),
1294            "1",
1295            "the skipped figure must not consume or gap the counter"
1296        );
1297        assert_eq!(
1298            read_str_attr(&doc, numbered_caption, "text"),
1299            Some("Figure\u{00A0}1: A plot.".to_owned())
1300        );
1301    }
1302
1303    #[test]
1304    fn custom_supplement_renders_in_caption_and_reference() {
1305        // `#figure(supplement: "Plate")` swaps the supplement word in both
1306        // the stamped caption and any reference to the figure (issue #76).
1307        let mut doc = Document::new(PathBuf::from("test.mos"));
1308        let (figure, caption_text) = make_captioned_figure(&mut doc, Some("fig:plate"), "A map.");
1309        if let Some(node) = doc.get_mut(figure) {
1310            node.attributes
1311                .insert("supplement".to_owned(), AttrValue::Str("Plate".to_owned()));
1312        }
1313        let ref_id = doc.alloc_child(
1314            doc.root,
1315            mos_core::NodeSpec::new(
1316                NodeKind::Reference,
1317                SourceSpan::placeholder(doc.file.clone()),
1318            )
1319            .with_attributes({
1320                let mut a = mos_core::AttrMap::new();
1321                a.insert("label".to_owned(), AttrValue::Str("fig:plate".to_owned()));
1322                a.insert("text".to_owned(), AttrValue::Str("?fig:plate?".to_owned()));
1323                a
1324            }),
1325        );
1326
1327        let diags = resolve(&mut doc, &BTreeSet::new());
1328        assert!(diags.is_empty(), "{diags:?}");
1329
1330        assert_eq!(
1331            read_str_attr(&doc, caption_text, "text"),
1332            Some("Plate\u{00A0}1: A map.".to_owned()),
1333            "the caption uses the custom supplement word"
1334        );
1335        assert_eq!(
1336            doc.get(ref_id).and_then(|r| r.attributes.get("text")),
1337            Some(&AttrValue::Str("Plate\u{00A0}1".to_owned())),
1338            "a reference renders the custom supplement, not `Figure`"
1339        );
1340    }
1341
1342    #[test]
1343    fn empty_supplement_renders_number_only() {
1344        // `#figure(supplement: "")` / `supplement: none` keeps the figure
1345        // numbered but drops the supplement word: the caption and any
1346        // reference show the number alone; the "no visible prefix" form
1347        // (issue #76). Distinct from `numbered: false`, which drops the
1348        // number entirely.
1349        let mut doc = Document::new(PathBuf::from("test.mos"));
1350        let (figure, caption_text) = make_captioned_figure(&mut doc, Some("fig:plain"), "A chart.");
1351        if let Some(node) = doc.get_mut(figure) {
1352            node.attributes
1353                .insert("supplement".to_owned(), AttrValue::Str(String::new()));
1354        }
1355        let ref_id = doc.alloc_child(
1356            doc.root,
1357            mos_core::NodeSpec::new(
1358                NodeKind::Reference,
1359                SourceSpan::placeholder(doc.file.clone()),
1360            )
1361            .with_attributes({
1362                let mut a = mos_core::AttrMap::new();
1363                a.insert("label".to_owned(), AttrValue::Str("fig:plain".to_owned()));
1364                a.insert("text".to_owned(), AttrValue::Str("?fig:plain?".to_owned()));
1365                a
1366            }),
1367        );
1368
1369        let diags = resolve(&mut doc, &BTreeSet::new());
1370        assert!(diags.is_empty(), "{diags:?}");
1371
1372        assert_eq!(
1373            read_str_attr(&doc, caption_text, "text"),
1374            Some("1: A chart.".to_owned()),
1375            "an empty supplement renders the number with no word and no leading space"
1376        );
1377        assert_eq!(
1378            doc.get(ref_id).and_then(|r| r.attributes.get("text")),
1379            Some(&AttrValue::Str("1".to_owned())),
1380            "a reference to a number-only figure renders just the number"
1381        );
1382    }
1383
1384    #[test]
1385    fn reference_to_skipped_figure_renders_bare_label() {
1386        // A reference to a `numbered: false` figure has no number to show,
1387        // so it falls back to the bare label name: like an image reference.
1388        let mut doc = Document::new(PathBuf::from("test.mos"));
1389        let figure = make_node(&mut doc, NodeKind::Figure, Some("fig:skip"), None);
1390        if let Some(node) = doc.get_mut(figure) {
1391            node.attributes
1392                .insert("numbered".to_owned(), AttrValue::Bool(false));
1393        }
1394        let ref_id = doc.alloc_child(
1395            doc.root,
1396            mos_core::NodeSpec::new(
1397                NodeKind::Reference,
1398                SourceSpan::placeholder(doc.file.clone()),
1399            )
1400            .with_attributes({
1401                let mut a = mos_core::AttrMap::new();
1402                a.insert("label".to_owned(), AttrValue::Str("fig:skip".to_owned()));
1403                a.insert("text".to_owned(), AttrValue::Str("?fig:skip?".to_owned()));
1404                a
1405            }),
1406        );
1407
1408        let diags = resolve(&mut doc, &BTreeSet::new());
1409        assert!(diags.is_empty(), "{diags:?}");
1410
1411        assert_eq!(
1412            doc.get(ref_id).and_then(|r| r.attributes.get("text")),
1413            Some(&AttrValue::Str("fig:skip".to_owned())),
1414            "a reference to a skipped figure renders the bare label"
1415        );
1416    }
1417
1418    #[test]
1419    fn resolve_is_idempotent_for_captioned_figures() {
1420        // `resolve` is public and re-entrant: the §6 stage 3 fixpoint and
1421        // future page-reference passes rerun it. Stamping the caption
1422        // label must therefore be idempotent; the second pass has to
1423        // reproduce `"Figure 1: A plot."` byte-for-byte instead of
1424        // re-reading the stamped text and nesting the label into
1425        // `"Figure 1: Figure 1: A plot."`.
1426        let mut doc = Document::new(PathBuf::from("test.mos"));
1427        let (_figure, caption_text) = make_captioned_figure(&mut doc, Some("fig:a"), "A plot.");
1428
1429        let first = resolve(&mut doc, &BTreeSet::new());
1430        assert!(first.is_empty(), "{first:?}");
1431        let after_first = read_str_attr(&doc, caption_text, "text");
1432        assert_eq!(after_first, Some("Figure\u{00A0}1: A plot.".to_owned()));
1433
1434        let second = resolve(&mut doc, &BTreeSet::new());
1435        assert!(second.is_empty(), "{second:?}");
1436        assert_eq!(
1437            read_str_attr(&doc, caption_text, "text"),
1438            after_first,
1439            "a second resolve pass must not re-stamp the figure label"
1440        );
1441    }
1442
1443    #[test]
1444    fn figures_get_sequential_document_order_numbers() {
1445        // Three figures, one without a label, get flat document-order
1446        // numbers. Numbering is unconditional: the unlabelled middle
1447        // figure still advances the counter.
1448        let mut doc = Document::new(PathBuf::from("test.mos"));
1449        let first = make_node(&mut doc, NodeKind::Figure, Some("fig:a"), None);
1450        let middle = make_node(&mut doc, NodeKind::Figure, None, None);
1451        let last = make_node(&mut doc, NodeKind::Figure, Some("fig:c"), None);
1452
1453        let diags = resolve(&mut doc, &BTreeSet::new());
1454        assert!(diags.is_empty(), "{diags:?}");
1455
1456        assert_eq!(node_number(&doc, first), "1");
1457        assert_eq!(
1458            node_number(&doc, middle),
1459            "2",
1460            "unlabelled figures are still numbered"
1461        );
1462        assert_eq!(node_number(&doc, last), "3");
1463    }
1464
1465    #[test]
1466    fn figures_and_sections_use_independent_counters() {
1467        // Sections and figures count independently: a figure sandwiched
1468        // between two sections is still figure "1", and the sections are
1469        // "1"/"2" regardless of the figures interleaved with them.
1470        let mut doc = Document::new(PathBuf::from("test.mos"));
1471        let sec_one = make_node(&mut doc, NodeKind::Section, Some("sec:a"), None);
1472        let fig_one = make_node(&mut doc, NodeKind::Figure, Some("fig:a"), None);
1473        let sec_two = make_node(&mut doc, NodeKind::Section, Some("sec:b"), None);
1474        let fig_two = make_node(&mut doc, NodeKind::Figure, Some("fig:b"), None);
1475
1476        let diags = resolve(&mut doc, &BTreeSet::new());
1477        assert!(diags.is_empty(), "{diags:?}");
1478
1479        assert_eq!(node_number(&doc, sec_one), "1");
1480        assert_eq!(node_number(&doc, sec_two), "2");
1481        assert_eq!(node_number(&doc, fig_one), "1");
1482        assert_eq!(node_number(&doc, fig_two), "2");
1483    }
1484
1485    #[test]
1486    fn section_target_index_carries_resolved_number() {
1487        let (doc, diags) = lower("= Intro <intro>\n\n== Methods <methods>\n");
1488        assert!(diags.is_empty(), "{diags:?}");
1489
1490        let mut sink: Vec<Diagnostic> = Vec::new();
1491        let index = build_label_index(&doc, &mut sink);
1492        assert!(sink.is_empty(), "{sink:?}");
1493
1494        assert_eq!(
1495            index.get("intro").map(|t| &t.kind),
1496            Some(&LabelTargetKind::Section {
1497                number: "1".to_owned()
1498            })
1499        );
1500        assert_eq!(
1501            index.get("methods").map(|t| &t.kind),
1502            Some(&LabelTargetKind::Section {
1503                number: "1.1".to_owned()
1504            })
1505        );
1506    }
1507
1508    #[test]
1509    fn level_three_numbers_correctly() {
1510        let (doc, diags) = lower("= A\n\n== B\n\n=== C\n\n== D\n\n= E\n");
1511        assert!(diags.is_empty(), "{diags:?}");
1512        let nums: Vec<String> = doc
1513            .nodes()
1514            .filter(|n| n.kind == NodeKind::Section)
1515            .filter_map(|n| match n.attributes.get("number") {
1516                Some(AttrValue::Str(s)) => Some(s.clone()),
1517                _ => None,
1518            })
1519            .collect();
1520        assert_eq!(nums, vec!["1", "1.1", "1.1.1", "1.2", "2"]);
1521    }
1522
1523    #[test]
1524    fn unknown_reference_suggests_nearest_label() {
1525        // A near-miss typo gets a machine-applicable "did you mean" fix:
1526        // replace the whole `@intrdo` token (sigil included) with `@intro`.
1527        let src = "= Intro <intro>\n\nsee @intrdo\n";
1528        let (doc, diags) = lower(src);
1529        let mos0033: Vec<&Diagnostic> = diags
1530            .iter()
1531            .filter(|d| d.def().code() == codes::MOS0033.code())
1532            .collect();
1533        assert_eq!(
1534            mos0033.len(),
1535            1,
1536            "expected exactly one MOS0033, got {diags:?}"
1537        );
1538        let d = mos0033[0];
1539        // Message and span are unchanged from the no-suggestion path.
1540        assert!(
1541            d.message().contains("`intrdo`"),
1542            "message should still name the missing label, got {:?}",
1543            d.message()
1544        );
1545        assert_eq!(
1546            d.span().map(|span| &src[span.start()..span.end()]),
1547            Some("@intrdo"),
1548            "MOS0033 span should still cover the bad reference exactly"
1549        );
1550        // Exactly one structured suggestion, replacing the full reference.
1551        let suggestions = d.suggestions();
1552        assert_eq!(
1553            suggestions.len(),
1554            1,
1555            "expected one nearest-label suggestion, got {suggestions:?}"
1556        );
1557        if let Some(suggestion) = suggestions.first() {
1558            assert_eq!(
1559                &src[suggestion.span.start()..suggestion.span.end()],
1560                "@intrdo",
1561                "suggestion should replace the whole `@` reference token"
1562            );
1563            assert_eq!(suggestion.replacement, "@intro");
1564            assert_eq!(
1565                apply_suggestion(src, suggestion),
1566                "= Intro <intro>\n\nsee @intro\n",
1567                "applying the fix should rewrite `@intrdo` to `@intro`"
1568            );
1569        }
1570        // The unresolved placeholder stays visible in the meantime.
1571        let reference_text = doc
1572            .nodes()
1573            .find(|n| n.kind == NodeKind::Reference)
1574            .and_then(|n| n.attributes.get("text"));
1575        assert_eq!(reference_text, Some(&AttrValue::Str("?intrdo?".to_owned())));
1576    }
1577
1578    #[test]
1579    fn unknown_reference_suggestion_breaks_ties_deterministically() {
1580        // `@intrx` sits one edit from both `intra` and `intro`. The tie
1581        // breaks on `(distance, label)`, so the single suggestion is always
1582        // the lexicographically smaller `@intra`.
1583        let src = "= A <intra>\n\n= B <intro>\n\nsee @intrx\n";
1584        let (_doc, diags) = lower(src);
1585        let mos0033: Vec<&Diagnostic> = diags
1586            .iter()
1587            .filter(|d| d.def().code() == codes::MOS0033.code())
1588            .collect();
1589        assert_eq!(mos0033.len(), 1, "got {diags:?}");
1590        if let Some(d) = mos0033.first() {
1591            let suggestions = d.suggestions();
1592            assert_eq!(
1593                suggestions.len(),
1594                1,
1595                "exactly one nearest-label suggestion, got {suggestions:?}"
1596            );
1597            if let Some(suggestion) = suggestions.first() {
1598                assert_eq!(
1599                    suggestion.replacement, "@intra",
1600                    "ties must resolve to the lexicographically smaller label"
1601                );
1602            }
1603        }
1604    }
1605
1606    #[test]
1607    fn unknown_reference_without_close_match_has_no_suggestion() {
1608        // An unrelated reference name is left without a guess.
1609        let src = "= Intro <intro>\n\nsee @conclusion\n";
1610        let (_doc, diags) = lower(src);
1611        let mos0033: Vec<&Diagnostic> = diags
1612            .iter()
1613            .filter(|d| d.def().code() == codes::MOS0033.code())
1614            .collect();
1615        assert_eq!(mos0033.len(), 1, "got {diags:?}");
1616        if let Some(d) = mos0033.first() {
1617            assert!(
1618                d.suggestions().is_empty(),
1619                "an unrelated label must not be suggested, got {:?}",
1620                d.suggestions()
1621            );
1622        }
1623    }
1624
1625    #[test]
1626    fn short_unknown_reference_has_no_suggestion() {
1627        // Conservative floor: references shorter than three bytes never get a
1628        // suggestion, even when a one-edit neighbour (`ax`) exists.
1629        let src = "= A <ax>\n\nsee @ab\n";
1630        let (_doc, diags) = lower(src);
1631        let mos0033: Vec<&Diagnostic> = diags
1632            .iter()
1633            .filter(|d| d.def().code() == codes::MOS0033.code())
1634            .collect();
1635        assert_eq!(mos0033.len(), 1, "got {diags:?}");
1636        if let Some(d) = mos0033.first() {
1637            assert!(
1638                d.suggestions().is_empty(),
1639                "short references must not be guessed, got {:?}",
1640                d.suggestions()
1641            );
1642        }
1643    }
1644
1645    #[test]
1646    fn unreferenceable_label_is_not_suggested() {
1647        // `#figure(label: "...")` / `#image(label: "...")` accept arbitrary
1648        // strings, so the index can hold a label the `@`-reference grammar
1649        // cannot spell. `@intro x` would not parse, so even this one-edit
1650        // match must be filtered out and produce no suggestion.
1651        let mut doc = Document::new(PathBuf::from("test.mos"));
1652        let _figure = make_node(&mut doc, NodeKind::Figure, Some("intro x"), None);
1653        let _reference = make_node(&mut doc, NodeKind::Reference, Some("introx"), None);
1654
1655        let mut diagnostics: Vec<Diagnostic> = Vec::new();
1656        let index = build_label_index(&doc, &mut diagnostics);
1657        let changed = rewrite_references(&mut doc, &index, &BTreeSet::new(), &mut diagnostics);
1658        assert!(!changed, "an unknown reference rewrites no text");
1659
1660        let mos0033: Vec<&Diagnostic> = diagnostics
1661            .iter()
1662            .filter(|d| d.def().code() == codes::MOS0033.code())
1663            .collect();
1664        assert_eq!(mos0033.len(), 1, "got {diagnostics:?}");
1665        if let Some(d) = mos0033.first() {
1666            assert!(
1667                d.suggestions().is_empty(),
1668                "an unreferenceable label must not be suggested, got {:?}",
1669                d.suggestions()
1670            );
1671        }
1672    }
1673}