Skip to main content

mos_eval/
lib.rs

1//! Expression and scripting evaluator (manifest §4, §25).
2//!
3//! The "evaluator" is really a *lowerer + resolver*: it walks a
4//! [`SyntaxTree`] from `mos-parse` and builds the typed semantic
5//! [`Document`] graph from `mos-core` (manifest §6 stage 2), then
6//! runs the [`resolve`](resolve()) pass to assign section numbers and rewrite
7//! `@label` cross-references (§6 stage 3, MVP 1).
8
9#![doc(
10    html_logo_url = "https://mosaic.kjanat.dev/assets/A4.svg",
11    html_favicon_url = "https://mosaic.kjanat.dev/assets/A4.svg"
12)]
13
14mod bibliography;
15mod image;
16mod image_lower;
17mod inline;
18mod list;
19mod pageref;
20mod resolve;
21mod set;
22mod set_schema;
23
24use std::collections::BTreeMap;
25
26use mos_core::{
27    AttrMap, AttrValue, CollectingSink, Diagnostic, Document, NodeId, NodeKind, NodeSpec, Severity,
28    SourceSpan,
29};
30use mos_parse::{DirectiveKind, Item, RawBlockKind, SyntaxTree};
31
32pub use pageref::{PageFixpointOutcome, resolve_page_reference_fixpoint, resolve_page_references};
33pub use resolve::resolve;
34
35use bibliography::{lower_bibliography_directive, resolve_citations};
36use image_lower::{lower_figure_directive, lower_image_directive};
37use inline::lower_inlines;
38use list::lower_list;
39use set::lower_set_directive;
40
41const LABEL_SPAN_START_ATTR: &str = "label_span.start";
42const LABEL_SPAN_END_ATTR: &str = "label_span.end";
43
44fn insert_label_attributes(attributes: &mut AttrMap, label: &str, label_span: Option<&SourceSpan>) {
45    attributes.insert("label".to_owned(), AttrValue::Str(label.to_owned()));
46    let Some(span) = label_span else {
47        return;
48    };
49    let (Ok(start), Ok(end)) = (i64::try_from(span.start()), i64::try_from(span.end())) else {
50        // AttrValue::Int is i64 while SourceSpan offsets are usize. If a future
51        // source can exceed that range, omit the fix-it span instead of storing
52        // a lossy edit location; the resolver will skip the unsafe suggestion.
53        return;
54    };
55    attributes.insert(LABEL_SPAN_START_ATTR.to_owned(), AttrValue::Int(start));
56    attributes.insert(LABEL_SPAN_END_ATTR.to_owned(), AttrValue::Int(end));
57}
58
59/// Document-level metadata harvested from `#set document(...)` directives.
60/// The PDF backend writes `title` and `author` to the Info dictionary;
61/// `language` is captured for the catalog `/Lang` entry that the next
62/// PDF-metadata slice will wire up.
63///
64/// # Examples
65///
66/// ```
67/// use mos_eval::DocumentMetadata;
68///
69/// let metadata = DocumentMetadata {
70///     title: Some("Demo".to_owned()),
71///     author: None,
72///     language: Some("en".to_owned()),
73/// };
74///
75/// assert_eq!(metadata.title.as_deref(), Some("Demo"));
76/// ```
77#[derive(Debug, Clone, Default, PartialEq, Eq)]
78pub struct DocumentMetadata {
79    pub title: Option<String>,
80    pub author: Option<String>,
81    pub language: Option<String>,
82}
83
84/// Result of lowering a [`SyntaxTree`] into a [`Document`].
85///
86/// # Examples
87///
88/// ```
89/// use std::path::Path;
90///
91/// use mos_core::CollectingSink;
92/// use mos_eval::{Evaluator, LowerResult};
93///
94/// let mut sink = CollectingSink::new();
95/// let parse_result = mos_parse::parse("= Hello\n", Path::new("main.mos"), &mut sink);
96/// assert!(
97///     parse_result.is_ok(),
98///     "parse structurally aborted: {parse_result:?}"
99/// );
100/// if let Ok(tree) = parse_result {
101///     let result: LowerResult = Evaluator::new().evaluate(&tree);
102///
103///     assert!(!result.has_errors());
104/// }
105/// ```
106#[derive(Debug)]
107pub struct LowerResult {
108    pub document: Document,
109    pub diagnostics: Vec<Diagnostic>,
110    pub metadata: DocumentMetadata,
111    /// Whether lowering this document read external files: `#image` /
112    /// `#figure` image loads and `#bibliography` source reads. Such a
113    /// lowering is **not a pure function of the source text**: the same
114    /// `(src, file)` can lower differently as referenced files appear,
115    /// change, or fail to load. Callers that cache a `LowerResult` across
116    /// time (e.g. the language server's per-document memo) must not reuse
117    /// one with this set, since an external change would make it stale
118    /// without any source edit to invalidate it.
119    pub reads_external_resources: bool,
120}
121
122impl LowerResult {
123    /// Return whether any lowering diagnostic is an error.
124    ///
125    /// # Examples
126    ///
127    /// ```
128    /// use std::path::Path;
129    ///
130    /// let result = mos_eval::lower("= Hello\n", Path::new("main.mos"));
131    ///
132    /// assert!(!result.has_errors());
133    /// ```
134    #[must_use]
135    pub fn has_errors(&self) -> bool {
136        self.diagnostics
137            .iter()
138            .any(|d| d.severity() == Severity::Error)
139    }
140}
141
142/// Lowerer from parse syntax to semantic document graph.
143///
144/// # Examples
145///
146/// ```
147/// use mos_eval::Evaluator;
148///
149/// let evaluator = Evaluator::new();
150///
151/// assert_eq!(format!("{evaluator:?}"), "Evaluator");
152/// ```
153#[derive(Default, Debug)]
154pub struct Evaluator;
155
156impl Evaluator {
157    /// Construct an evaluator.
158    ///
159    /// # Examples
160    ///
161    /// ```
162    /// use mos_eval::Evaluator;
163    ///
164    /// let evaluator = Evaluator::new();
165    ///
166    /// assert_eq!(format!("{evaluator:?}"), "Evaluator");
167    /// ```
168    #[must_use]
169    pub fn new() -> Self {
170        Self
171    }
172
173    /// Lower `tree` into a semantic [`Document`].
174    ///
175    /// # Examples
176    ///
177    /// ```
178    /// use std::path::Path;
179    ///
180    /// use mos_core::CollectingSink;
181    /// use mos_eval::Evaluator;
182    ///
183    /// let mut sink = CollectingSink::new();
184    /// let parse_result = mos_parse::parse("= Hello\n", Path::new("main.mos"), &mut sink);
185    /// assert!(
186    ///     parse_result.is_ok(),
187    ///     "parse structurally aborted: {parse_result:?}"
188    /// );
189    /// if let Ok(tree) = parse_result {
190    ///     let result = Evaluator::new().evaluate(&tree);
191    ///
192    ///     assert_eq!(result.document.len(), 3);
193    /// }
194    /// ```
195    pub fn evaluate(&self, tree: &SyntaxTree) -> LowerResult {
196        let mut document = Document::new(tree.file.clone());
197        let mut diagnostics: Vec<Diagnostic> = Vec::new();
198        let mut metadata = DocumentMetadata::default();
199        // Tracks the most-recently-set body text size in pt so `em`
200        // literals on later directives resolve against the right unit.
201        // Defaults to 11pt to match `mos-layout`'s `BODY_SIZE_PT`.
202        let mut current_text_size_pt: f64 = 11.0;
203        // Set when a directive reads the filesystem (`#image` / `#figure`
204        // image loads, `#bibliography` source reads), marking this lowering
205        // impure over `(src, file)` so time-based caches don't reuse it.
206        let mut reads_external_resources = false;
207        let root = document.root;
208
209        for item in &tree.items {
210            match item {
211                Item::Heading {
212                    level,
213                    inlines,
214                    label,
215                    label_span,
216                    span,
217                } => {
218                    let mut attributes: AttrMap = BTreeMap::new();
219                    attributes.insert("level".to_owned(), AttrValue::Int(i64::from(*level)));
220                    if let Some(id) = label {
221                        insert_label_attributes(&mut attributes, id, label_span.as_ref());
222                    }
223                    let heading = document.alloc_child(
224                        root,
225                        NodeSpec::new(NodeKind::Section, span.clone()).with_attributes(attributes),
226                    );
227                    lower_inlines(&mut document, heading, inlines);
228                }
229                Item::Paragraph {
230                    inlines,
231                    label,
232                    label_span,
233                    span,
234                } => {
235                    let mut attributes: AttrMap = BTreeMap::new();
236                    if let Some(id) = label {
237                        insert_label_attributes(&mut attributes, id, label_span.as_ref());
238                    }
239                    let para = document.alloc_child(
240                        root,
241                        NodeSpec::new(NodeKind::Paragraph, span.clone())
242                            .with_attributes(attributes),
243                    );
244                    lower_inlines(&mut document, para, inlines);
245                }
246                Item::List {
247                    ordered,
248                    items,
249                    span,
250                } => {
251                    lower_list(&mut document, root, *ordered, items, span);
252                }
253                Item::RawBlock {
254                    kind,
255                    text,
256                    label,
257                    label_span,
258                    span,
259                    ..
260                } => {
261                    lower_raw_block(
262                        &mut document,
263                        root,
264                        *kind,
265                        text,
266                        label.as_deref(),
267                        label_span.as_ref(),
268                        span,
269                    );
270                }
271                Item::Set {
272                    kind,
273                    name,
274                    args,
275                    span,
276                } => match kind {
277                    // `DirectiveKind` (set by the parser) is the
278                    // discriminator here, *not* `name`: `#set image(...)`
279                    // and `#image(...)` are both parsed with `name ==
280                    // "image"`, and dispatching on the string would
281                    // route `#set image(width: 200pt)` into the image
282                    // loader and incorrectly raise MOS0037 "missing path".
283                    DirectiveKind::Image => {
284                        lower_image_directive(
285                            &mut document,
286                            root,
287                            args,
288                            span,
289                            &tree.file,
290                            current_text_size_pt,
291                            &mut diagnostics,
292                        );
293                        reads_external_resources = true;
294                    }
295                    DirectiveKind::Figure => {
296                        lower_figure_directive(
297                            &mut document,
298                            root,
299                            args,
300                            span,
301                            &tree.file,
302                            current_text_size_pt,
303                            &mut diagnostics,
304                        );
305                        reads_external_resources = true;
306                    }
307                    DirectiveKind::Bibliography => {
308                        lower_bibliography_directive(
309                            &mut document,
310                            root,
311                            args,
312                            span,
313                            &tree.file,
314                            &mut diagnostics,
315                        );
316                        reads_external_resources = true;
317                    }
318                    DirectiveKind::Set => lower_set_directive(
319                        &mut document,
320                        root,
321                        name,
322                        args,
323                        span,
324                        &mut metadata,
325                        &mut current_text_size_pt,
326                        &mut diagnostics,
327                    ),
328                },
329            }
330        }
331
332        LowerResult {
333            document,
334            diagnostics,
335            metadata,
336            reads_external_resources,
337        }
338    }
339}
340
341fn lower_raw_block(
342    document: &mut Document,
343    root: NodeId,
344    kind: RawBlockKind,
345    text: &str,
346    label: Option<&str>,
347    label_span: Option<&SourceSpan>,
348    span: &SourceSpan,
349) {
350    let mut attributes: AttrMap = BTreeMap::new();
351    attributes.insert("text".to_owned(), AttrValue::Str(text.to_owned()));
352    if let Some(id) = label {
353        insert_label_attributes(&mut attributes, id, label_span);
354    }
355    attributes.insert(
356        "raw.kind".to_owned(),
357        AttrValue::Str(
358            match kind {
359                RawBlockKind::Pre => "pre",
360                RawBlockKind::Code => "code",
361            }
362            .to_owned(),
363        ),
364    );
365    document.alloc_child(
366        root,
367        NodeSpec::new(NodeKind::Raw, span.clone()).with_attributes(attributes),
368    );
369}
370
371/// Convenience: parse + lower + resolve in one step. Concatenates the
372/// diagnostics from each stage so callers can render them uniformly.
373///
374/// # Examples
375///
376/// ```
377/// use std::path::Path;
378///
379/// let result = mos_eval::lower("= Hello\n", Path::new("main.mos"));
380///
381/// assert!(!result.has_errors());
382/// assert_eq!(result.document.len(), 3);
383/// ```
384pub fn lower(src: &str, file: &std::path::Path) -> LowerResult {
385    let mut sink = CollectingSink::new();
386    let tree = match mos_parse::parse(src, file, &mut sink) {
387        Ok(tree) => tree,
388        // `CollectingSink` never asks the parser to abort; this arm is
389        // unreachable in practice but keeps the pipeline total.
390        Err(mos_core::DiagnosticAbort) => {
391            return LowerResult {
392                document: Document::new(file.to_path_buf()),
393                diagnostics: sink.into_diagnostics(),
394                metadata: DocumentMetadata::default(),
395                // Parse aborted before any directive ran: no external reads.
396                reads_external_resources: false,
397            };
398        }
399    };
400    let mut diagnostics = sink.into_diagnostics();
401    let mut lowered = lower_tree(&tree);
402    diagnostics.append(&mut lowered.diagnostics);
403    LowerResult {
404        document: lowered.document,
405        diagnostics,
406        metadata: lowered.metadata,
407        reads_external_resources: lowered.reads_external_resources,
408    }
409}
410
411/// Lower an already-parsed [`SyntaxTree`]: evaluate it, then run the
412/// §6 stage-3 resolver. The CLI calls this *after* `mos_parse::parse`
413/// so a phase barrier can sit between parsing and lowering; [`lower`]
414/// is the parse-and-lower convenience used by tests and embedders.
415///
416/// # Examples
417///
418/// ```
419/// use std::path::Path;
420///
421/// let mut sink = mos_core::CollectingSink::new();
422/// let tree = mos_parse::parse(
423///     "= Intro <intro>\n\nSee @intro\n",
424///     Path::new("main.mos"),
425///     &mut sink,
426/// )?;
427/// let lowered = mos_eval::lower_tree(&tree);
428///
429/// assert!(!lowered.has_errors());
430/// # Ok::<(), mos_core::DiagnosticAbort>(())
431/// ```
432#[must_use]
433pub fn lower_tree(tree: &SyntaxTree) -> LowerResult {
434    let mut lowered = Evaluator::new().evaluate(tree);
435    let mut diagnostics = std::mem::take(&mut lowered.diagnostics);
436    let bib_keys = resolve_citations(&mut lowered.document, &mut diagnostics);
437    diagnostics.extend(resolve(&mut lowered.document, &bib_keys));
438    LowerResult {
439        document: lowered.document,
440        diagnostics,
441        metadata: lowered.metadata,
442        reads_external_resources: lowered.reads_external_resources,
443    }
444}
445
446#[cfg(test)]
447mod tests {
448    #![allow(
449        clippy::unwrap_used,
450        clippy::expect_used,
451        clippy::panic,
452        reason = "tests panic loudly on setup failure; matches crate-wide test-module convention"
453    )]
454    use std::path::PathBuf;
455
456    use mos_core::{NodeKind, codes};
457
458    use super::*;
459
460    #[cfg(target_pointer_width = "64")]
461    #[test]
462    fn label_attributes_omit_unrepresentable_span_bounds() {
463        let too_large = usize::try_from(i64::MAX).unwrap().saturating_add(1);
464        let span = SourceSpan::new(
465            PathBuf::from("test.mos"),
466            too_large,
467            too_large.saturating_add(1),
468        );
469        let mut attributes = AttrMap::new();
470
471        insert_label_attributes(&mut attributes, "huge", Some(&span));
472
473        assert_eq!(
474            attributes.get("label"),
475            Some(&AttrValue::Str("huge".to_owned()))
476        );
477        assert!(!attributes.contains_key(LABEL_SPAN_START_ATTR));
478        assert!(!attributes.contains_key(LABEL_SPAN_END_ATTR));
479    }
480
481    #[test]
482    fn reads_external_resources_flags_filesystem_directives() {
483        let file = PathBuf::from("test.mos");
484        // Pure: headings, paragraphs, and references touch no files.
485        assert!(
486            !lower("= Title <t>\n\nSee @t\n", &file).reads_external_resources,
487            "a source with no filesystem directives lowers purely"
488        );
489        // Each filesystem-reading directive marks the lowering impure: even
490        // when the referenced file is missing, since the *attempt* is what
491        // makes the result depend on external state.
492        assert!(
493            lower("#image(\"missing.png\")\n", &file).reads_external_resources,
494            "`#image` reads an external file"
495        );
496        assert!(
497            lower("#figure(image: \"missing.png\")\n", &file).reads_external_resources,
498            "`#figure` loads an external image"
499        );
500        assert!(
501            lower("#bibliography(\"missing.bib\")\n", &file).reads_external_resources,
502            "`#bibliography` reads an external source file"
503        );
504    }
505
506    #[test]
507    fn lowerer_stamps_paired_label_span_covering_the_label_token() {
508        // Contract `mos-lsp` go-to-definition depends on (#101, hardened by
509        // #103): a labelled declaration carries BOTH `label_span.start` and
510        // `label_span.end` as `Int`s spanning exactly the label token, so the
511        // LSP can target `<intro>` rather than the whole heading. If the
512        // lowerer ever stamps one attribute without the other, or stops
513        // stamping them: `definition.rs`'s safe fallback would silently widen
514        // the definition range with no test catching it. This locks the pair.
515        let src = "= Intro <intro>\n";
516        let r = lower(src, &PathBuf::from("test.mos"));
517        assert!(!r.has_errors(), "{:?}", r.diagnostics);
518
519        let section = r
520            .document
521            .nodes()
522            .find(|node| {
523                node.kind == NodeKind::Section
524                    && node.attributes.get("label") == Some(&AttrValue::Str("intro".to_owned()))
525            })
526            .expect("a Section node carrying label `intro`");
527
528        let (Some(&AttrValue::Int(start)), Some(&AttrValue::Int(end))) = (
529            section.attributes.get(LABEL_SPAN_START_ATTR),
530            section.attributes.get(LABEL_SPAN_END_ATTR),
531        ) else {
532            panic!(
533                "expected paired Int `label_span.*` attrs, got {:?} / {:?}",
534                section.attributes.get(LABEL_SPAN_START_ATTR),
535                section.attributes.get(LABEL_SPAN_END_ATTR),
536            );
537        };
538
539        let start = usize::try_from(start).expect("label_span.start fits usize");
540        let end = usize::try_from(end).expect("label_span.end fits usize");
541        assert!(
542            start <= end,
543            "label span start {start} must not exceed end {end}"
544        );
545        assert_eq!(
546            src.get(start..end),
547            Some("intro"),
548            "label span must cover exactly the `intro` token, not the whole heading"
549        );
550    }
551
552    #[test]
553    fn reference_nodes_carry_stamped_label_span() {
554        // Issue #116: lowered `@label` / `@page(label)` reference nodes carry
555        // the same paired `label_span.*` identifier attributes as declarations,
556        // so `mos-lsp` rename reads the editable range directly instead of
557        // deriving it from the reference node's span geometry.
558        let src = "= Intro <intro>\n\nsee @intro and @page(intro)\n";
559        let r = lower(src, &PathBuf::from("test.mos"));
560        assert!(!r.has_errors(), "{:?}", r.diagnostics);
561
562        for kind in [NodeKind::Reference, NodeKind::PageReference] {
563            let node = r
564                .document
565                .nodes()
566                .find(|node| node.kind == kind)
567                .unwrap_or_else(|| panic!("expected a {kind:?} node"));
568            let (Some(&AttrValue::Int(start)), Some(&AttrValue::Int(end))) = (
569                node.attributes.get(LABEL_SPAN_START_ATTR),
570                node.attributes.get(LABEL_SPAN_END_ATTR),
571            ) else {
572                panic!(
573                    "{kind:?} node missing paired `label_span.*`: {:?}",
574                    node.attributes
575                );
576            };
577            let start = usize::try_from(start).expect("label_span.start fits usize");
578            let end = usize::try_from(end).expect("label_span.end fits usize");
579            assert_eq!(
580                src.get(start..end),
581                Some("intro"),
582                "{kind:?} label span must cover exactly the `intro` identifier"
583            );
584        }
585    }
586
587    #[test]
588    fn lowers_heading_and_paragraph() {
589        let r = lower(
590            "= Hello\n\nbody *italic* text\n",
591            &PathBuf::from("test.mos"),
592        );
593        assert!(!r.has_errors());
594        // Document root + Section + Paragraph + 1 Text inside Section
595        // + 3 inline children of Paragraph (text/emphasis/text).
596        assert_eq!(r.document.len(), 1 + 2 + 1 + 3);
597
598        let kinds: Vec<NodeKind> = r.document.nodes().map(|n| n.kind).collect();
599        assert_eq!(kinds[0], NodeKind::Document);
600        assert!(kinds.contains(&NodeKind::Section));
601        assert!(kinds.contains(&NodeKind::Paragraph));
602        assert!(kinds.contains(&NodeKind::Emphasis));
603    }
604
605    #[test]
606    fn lowers_nested_bold_italic_inline() {
607        let r = lower("***both***\n", &PathBuf::from("test.mos"));
608        assert!(!r.has_errors(), "{:?}", r.diagnostics);
609        assert!(
610            r.document.nodes().any(|n| n.kind == NodeKind::BoldItalic),
611            "expected bold-italic node in {:?}",
612            r.document.nodes().map(|n| n.kind).collect::<Vec<_>>()
613        );
614    }
615
616    #[test]
617    fn root_owns_top_level_items() {
618        let r = lower("= A\n\n= B\n\npara\n", &PathBuf::from("test.mos"));
619        let root = r.document.get(r.document.root).unwrap();
620        assert_eq!(root.children.len(), 3);
621    }
622
623    #[test]
624    fn hard_break_lowers_to_hardbreak_node_without_text_attr() {
625        let r = lower("foo\\\\bar\n", &PathBuf::from("test.mos"));
626        assert!(!r.has_errors(), "{:?}", r.diagnostics);
627
628        // The paragraph is the second top-level node (after the
629        // document root). Find its children.
630        let root = r.document.get(r.document.root).unwrap();
631        let paragraph_id = *root.children.first().unwrap();
632        let paragraph = r.document.get(paragraph_id).unwrap();
633        let inline_kinds: Vec<NodeKind> = paragraph
634            .children
635            .iter()
636            .filter_map(|id| r.document.get(*id).map(|n| n.kind))
637            .collect();
638        assert_eq!(
639            inline_kinds,
640            vec![NodeKind::Text, NodeKind::HardBreak, NodeKind::Text],
641            "got {inline_kinds:?}"
642        );
643
644        // The HardBreak node must have no `text` attribute -- layout
645        // dispatch matches on kind, not on text presence.
646        let hardbreak_id = paragraph.children[1];
647        let hardbreak = r.document.get(hardbreak_id).unwrap();
648        assert!(
649            hardbreak.attributes.is_empty(),
650            "expected empty attribute map on HardBreak, got {:?}",
651            hardbreak.attributes
652        );
653    }
654
655    /// Hand-craft a tiny PNG in a temp dir so the eval tests don't
656    /// depend on `examples/` paths or the workspace layout.
657    /// `::image::` (rather than `image::`) routes through the extern
658    /// `image` crate; the bare `image` identifier inside the eval
659    /// crate resolves to the local `mod image` we declared up top.
660    fn write_tiny_png(name: &str) -> PathBuf {
661        let dir = std::env::temp_dir().join(format!(
662            "mos-eval-image-{}-{}",
663            name,
664            std::time::SystemTime::now()
665                .duration_since(std::time::UNIX_EPOCH)
666                .map_or(0, |d| d.as_nanos())
667        ));
668        std::fs::create_dir_all(&dir).unwrap();
669        let path = dir.join(name);
670        let mut buf = ::image::RgbaImage::new(3, 2);
671        for x in 0_u32..3 {
672            for y in 0_u32..2 {
673                let r = u8::try_from(x * 80).unwrap_or(0);
674                let g = u8::try_from(y * 120).unwrap_or(0);
675                buf.put_pixel(x, y, ::image::Rgba([r, g, 200, 255]));
676            }
677        }
678        buf.save(&path).unwrap();
679        path
680    }
681
682    #[test]
683    fn image_directive_attaches_decoded_pixels() {
684        let png_path = write_tiny_png("tiny.png");
685        let source = png_path.parent().unwrap().join("main.mos");
686        std::fs::write(&source, "#image(\"tiny.png\")\n").unwrap();
687        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
688        assert!(!r.has_errors(), "{:?}", r.diagnostics);
689        let image_node = r
690            .document
691            .nodes()
692            .find(|n| n.kind == NodeKind::Image)
693            .expect("Image node");
694        assert_eq!(
695            image_node.attributes.get("src"),
696            Some(&AttrValue::Str("tiny.png".to_owned()))
697        );
698        assert_eq!(
699            image_node.attributes.get("pixel_width"),
700            Some(&AttrValue::Int(3))
701        );
702        assert_eq!(
703            image_node.attributes.get("pixel_height"),
704            Some(&AttrValue::Int(2))
705        );
706        match image_node.attributes.get("pixels") {
707            Some(AttrValue::Bytes(b)) => assert_eq!(b.len(), 3 * 3 * 2),
708            other => panic!("expected pixel bytes, got {other:?}"),
709        }
710        std::fs::remove_dir_all(png_path.parent().unwrap()).ok();
711    }
712
713    #[test]
714    fn image_directive_records_explicit_dimensions() {
715        let png_path = write_tiny_png("dims.png");
716        let source = png_path.parent().unwrap().join("main.mos");
717        std::fs::write(
718            &source,
719            "#image(\"dims.png\", width: 100pt, height: 60pt, alt: \"a tiny image\")\n",
720        )
721        .unwrap();
722        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
723        assert!(!r.has_errors(), "{:?}", r.diagnostics);
724        let image_node = r
725            .document
726            .nodes()
727            .find(|n| n.kind == NodeKind::Image)
728            .expect("Image node");
729        assert_eq!(
730            image_node.attributes.get("width"),
731            Some(&AttrValue::Length(100.0))
732        );
733        assert_eq!(
734            image_node.attributes.get("height"),
735            Some(&AttrValue::Length(60.0))
736        );
737        assert_eq!(
738            image_node.attributes.get("alt"),
739            Some(&AttrValue::Str("a tiny image".to_owned()))
740        );
741        std::fs::remove_dir_all(png_path.parent().unwrap()).ok();
742    }
743
744    #[test]
745    fn image_em_width_resolves_against_current_text_size() {
746        // Regression: `#image(width: 2em)` after `#set text(size: 20pt)`
747        // must yield 40pt, not 22pt (which is what the old hardcoded
748        // 11pt em base produced). The lowerer now threads the tracked
749        // body text size through to `build_image_attributes`.
750        let png_path = write_tiny_png("em.png");
751        let dir = png_path.parent().unwrap();
752        let source = dir.join("main.mos");
753        std::fs::write(
754            &source,
755            "#set text(size: 20pt)\n#image(\"em.png\", width: 2em)\n",
756        )
757        .unwrap();
758        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
759        assert!(!r.has_errors(), "{:?}", r.diagnostics);
760        let image_node = r
761            .document
762            .nodes()
763            .find(|n| n.kind == NodeKind::Image)
764            .expect("Image node");
765        match image_node.attributes.get("width") {
766            Some(AttrValue::Length(pt)) => assert!(
767                (pt - 40.0).abs() < 0.01,
768                "width = {pt}pt, expected 40pt (2em at 20pt)"
769            ),
770            other => panic!("expected width Length, got {other:?}"),
771        }
772        std::fs::remove_dir_all(dir).ok();
773    }
774
775    #[test]
776    fn missing_image_path_emits_mos0037() {
777        let r = lower("#image()\n", &PathBuf::from("/tmp/no-such.mos"));
778        assert!(
779            r.diagnostics
780                .iter()
781                .any(|d| d.def().code() == codes::MOS0037.code()),
782            "expected MOS0037, got {:?}",
783            r.diagnostics
784        );
785    }
786
787    #[test]
788    fn unreadable_image_emits_mos0012() {
789        let r = lower(
790            "#image(\"does-not-exist.png\")\n",
791            &PathBuf::from("/tmp/no-such-dir/main.mos"),
792        );
793        assert!(
794            r.diagnostics
795                .iter()
796                .any(|d| d.def().code() == codes::MOS0012.code()),
797            "expected MOS0012, got {:?}",
798            r.diagnostics
799        );
800    }
801
802    #[test]
803    fn empty_image_path_emits_mos0037_not_io_error() {
804        // `#image("")` is a missing-path mistake, not an I/O failure.
805        // The diagnostic surface treats it the same as omitting the
806        // path entirely so the user sees a clear "needs a path"
807        // message instead of `MOS0012`/`MOS0029` noise.
808        let r = lower("#image(\"\")\n", &PathBuf::from("/tmp/whatever/main.mos"));
809        assert!(
810            r.diagnostics
811                .iter()
812                .any(|d| d.def().code() == codes::MOS0037.code()),
813            "expected MOS0037, got {:?}",
814            r.diagnostics
815        );
816        // No MOS0012/MOS0029 should leak through.
817        assert!(
818            !r.diagnostics.iter().any(|d| {
819                d.def().code() == codes::MOS0012.code() || d.def().code() == codes::MOS0029.code()
820            }),
821            "unexpected I/O diagnostic: {:?}",
822            r.diagnostics
823        );
824    }
825
826    #[test]
827    fn non_positive_image_width_emits_mos0020() {
828        // `width: 0pt` and `width: -10pt` would otherwise produce a
829        // zero/negative image box that sails into layout and PDF
830        // emit. Reject at lower time with MOS0020.
831        for src in [
832            "#image(\"x.png\", width: 0pt)\n",
833            "#image(\"x.png\", width: -10pt)\n",
834            "#image(\"x.png\", width: 0)\n",
835            "#image(\"x.png\", width: -1)\n",
836        ] {
837            let r = lower(src, &PathBuf::from("/tmp/whatever/main.mos"));
838            assert!(
839                r.diagnostics
840                    .iter()
841                    .any(|d| d.def().code() == codes::MOS0020.code()),
842                "expected MOS0020 for `{src}`, got {:?}",
843                r.diagnostics
844            );
845        }
846    }
847
848    #[test]
849    fn non_positive_image_height_emits_mos0020() {
850        for src in [
851            "#image(\"x.png\", height: 0pt)\n",
852            "#image(\"x.png\", height: -1mm)\n",
853        ] {
854            let r = lower(src, &PathBuf::from("/tmp/whatever/main.mos"));
855            assert!(
856                r.diagnostics
857                    .iter()
858                    .any(|d| d.def().code() == codes::MOS0020.code()),
859                "expected MOS0020 for `{src}`, got {:?}",
860                r.diagnostics
861            );
862        }
863    }
864
865    #[test]
866    fn undecodable_image_emits_mos0029() {
867        let dir = std::env::temp_dir().join(format!(
868            "mos-eval-bad-{}",
869            std::time::SystemTime::now()
870                .duration_since(std::time::UNIX_EPOCH)
871                .map_or(0, |d| d.as_nanos())
872        ));
873        std::fs::create_dir_all(&dir).unwrap();
874        let png = dir.join("bad.png");
875        std::fs::write(&png, b"not really a PNG").unwrap();
876        let source = dir.join("main.mos");
877        std::fs::write(&source, "#image(\"bad.png\")\n").unwrap();
878        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
879        assert!(
880            r.diagnostics
881                .iter()
882                .any(|d| d.def().code() == codes::MOS0029.code()),
883            "expected MOS0029, got {:?}",
884            r.diagnostics
885        );
886        std::fs::remove_dir_all(&dir).ok();
887    }
888
889    #[test]
890    fn figure_directive_creates_figure_with_image_and_caption() {
891        let png_path = write_tiny_png("fig.png");
892        let source = png_path.parent().unwrap().join("main.mos");
893        std::fs::write(
894            &source,
895            "#figure(image: \"fig.png\", caption: \"A tiny picture.\")\n",
896        )
897        .unwrap();
898        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
899        assert!(!r.has_errors(), "{:?}", r.diagnostics);
900        let figure = r
901            .document
902            .nodes()
903            .find(|n| n.kind == NodeKind::Figure)
904            .expect("Figure node");
905        assert_eq!(figure.children.len(), 2);
906        let img = r.document.get(figure.children[0]).unwrap();
907        assert_eq!(img.kind, NodeKind::Image);
908        let caption = r.document.get(figure.children[1]).unwrap();
909        assert_eq!(caption.kind, NodeKind::Paragraph);
910        assert_eq!(
911            caption.attributes.get("role"),
912            Some(&AttrValue::Str("caption".to_owned()))
913        );
914        // `lower` runs the resolver, which numbers the figure and stamps
915        // the non-breaking `Figure 1: ` supplement label onto the caption.
916        let caption_text = r.document.get(caption.children[0]).unwrap();
917        assert_eq!(
918            caption_text.attributes.get("text"),
919            Some(&AttrValue::Str(
920                "Figure\u{00A0}1: A tiny picture.".to_owned()
921            ))
922        );
923        std::fs::remove_dir_all(png_path.parent().unwrap()).ok();
924    }
925
926    #[test]
927    fn figure_with_missing_image_does_not_leak_empty_node() {
928        // If `#figure(image: "broken.png", caption: "...")` fails to
929        // load the image, the caller still emits MOS0012; the lowerer
930        // must NOT leave a Figure (or its caption paragraph) hanging
931        // on the document root. A caption-only figure renders next
932        // to whatever the user thought they were captioning, which
933        // is worse than no output for the failed block.
934        let r = lower(
935            "#figure(image: \"does-not-exist.png\", caption: \"missing\")\n",
936            &PathBuf::from("/tmp/no-such-dir/main.mos"),
937        );
938        assert!(
939            r.diagnostics
940                .iter()
941                .any(|d| d.def().code() == codes::MOS0012.code())
942        );
943        assert!(
944            !r.document.nodes().any(|n| n.kind == NodeKind::Figure),
945            "Figure node leaked after image load failure",
946        );
947    }
948
949    #[test]
950    fn figure_label_reference_resolves_to_figure_number() {
951        // End-to-end: a real `#figure(label: ...)` lowers with its label
952        // on the Figure node, the resolver numbers the figure, and an
953        // `@label` reference rewrites to kind-aware `Figure 1` text. Note
954        // the space before `here.`: a `.` flush against the reference
955        // would be absorbed into the label (`fig:plot.`) and miss.
956        let png_path = write_tiny_png("ref-fig.png");
957        let dir = png_path.parent().unwrap();
958        let source = dir.join("main.mos");
959        std::fs::write(
960            &source,
961            "#figure(image: \"ref-fig.png\", caption: \"A plot.\", label: \"fig:plot\")\n\nSee @fig:plot here.\n",
962        )
963        .unwrap();
964        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
965        assert!(!r.has_errors(), "{:?}", r.diagnostics);
966
967        let figure = r
968            .document
969            .nodes()
970            .find(|n| n.kind == NodeKind::Figure)
971            .expect("Figure node");
972        assert_eq!(
973            figure.attributes.get("number"),
974            Some(&AttrValue::Str("1".to_owned())),
975            "the lowered figure is numbered in document order"
976        );
977        assert_eq!(
978            figure.attributes.get("label"),
979            Some(&AttrValue::Str("fig:plot".to_owned())),
980            "the `label:` argument lands on the Figure node"
981        );
982
983        // The caption text is stamped with the visible, non-breaking label.
984        let caption_text = figure
985            .children
986            .iter()
987            .filter_map(|c| r.document.get(*c))
988            .find(|c| {
989                matches!(c.attributes.get("role"), Some(AttrValue::Str(role)) if role == "caption")
990            })
991            .and_then(|caption| caption.children.first())
992            .and_then(|text_id| r.document.get(*text_id))
993            .and_then(|text| text.attributes.get("text"));
994        assert_eq!(
995            caption_text,
996            Some(&AttrValue::Str("Figure\u{00A0}1: A plot.".to_owned())),
997            "the caption is prefixed with the `Figure N: ` label"
998        );
999
1000        let reference = r
1001            .document
1002            .nodes()
1003            .find(|n| n.kind == NodeKind::Reference)
1004            .expect("Reference node");
1005        assert_eq!(
1006            reference.attributes.get("text"),
1007            Some(&AttrValue::Str("Figure\u{00A0}1".to_owned())),
1008            "the `@fig:plot` reference resolves to kind-aware figure text"
1009        );
1010
1011        std::fs::remove_dir_all(dir).ok();
1012    }
1013
1014    #[test]
1015    fn page_reference_lowers_to_inert_page_reference_node() {
1016        // `@page(label)` reaches the semantic model as a distinct
1017        // `NodeKind::PageReference` carrying the bare label and a `?label?`
1018        // placeholder (the unresolved-reference pattern). This slice models the
1019        // node but does not resolve it: page resolution is the resolve↔layout
1020        // fixpoint (issue #72), so it must NOT be folded into the cross-
1021        // reference machinery and the placeholder must survive lowering. The
1022        // label is declared so the lower-time validation stays quiet here.
1023        let r = lower(
1024            "= Intro <intro>\n\nSee @page(intro) here.\n",
1025            &PathBuf::from("test.mos"),
1026        );
1027        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1028
1029        let page_ref = r
1030            .document
1031            .nodes()
1032            .find(|n| n.kind == NodeKind::PageReference)
1033            .expect("PageReference node");
1034        assert_eq!(
1035            page_ref.attributes.get("label"),
1036            Some(&AttrValue::Str("intro".to_owned())),
1037        );
1038        assert_eq!(
1039            page_ref.attributes.get("text"),
1040            Some(&AttrValue::Str("?intro?".to_owned())),
1041            "unresolved page references keep a visible placeholder",
1042        );
1043        // A page reference is its own kind, not an `@label` cross-reference.
1044        assert!(!r.document.nodes().any(|n| n.kind == NodeKind::Reference));
1045    }
1046
1047    #[test]
1048    fn undeclared_page_reference_label_emits_mos0033() {
1049        // An undeclared label in `@page(...)` is a lower-time error, exactly
1050        // like a bad `@ref`: `mos check` reports it without laying out.
1051        let r = lower("See @page(missing) here.\n", &PathBuf::from("test.mos"));
1052        assert!(
1053            r.diagnostics
1054                .iter()
1055                .any(|d| d.def().code() == codes::MOS0033.code()),
1056            "{:?}",
1057            r.diagnostics
1058        );
1059        // The placeholder survives so the page reference stays visible.
1060        let page_ref = r
1061            .document
1062            .nodes()
1063            .find(|n| n.kind == NodeKind::PageReference)
1064            .expect("PageReference node");
1065        assert_eq!(
1066            page_ref.attributes.get("text"),
1067            Some(&AttrValue::Str("?missing?".to_owned())),
1068        );
1069    }
1070
1071    #[test]
1072    fn page_reference_to_a_declared_label_is_not_a_duplicate_declaration() {
1073        // A page reference *consumes* a label; it must not be mistaken for a
1074        // second declaration of `intro`, which would wrongly emit MOS0030.
1075        let r = lower(
1076            "= Intro <intro>\n\nSee @page(intro) here.\n",
1077            &PathBuf::from("test.mos"),
1078        );
1079        assert!(
1080            !r.diagnostics
1081                .iter()
1082                .any(|d| d.def().code() == codes::MOS0030.code()),
1083            "{:?}",
1084            r.diagnostics
1085        );
1086    }
1087
1088    #[test]
1089    fn citation_lowers_to_citation_node_with_key_and_span() {
1090        // `[@key]` must reach the semantic model as `NodeKind::Citation`
1091        // with the bare key in the `key` attribute and a span that
1092        // covers the full `[@key]` source extent. The placeholder
1093        // `text` attribute mirrors the unresolved-reference pattern
1094        // so layout still renders something visible before citation
1095        // display rendering exists.
1096        let src = "see [@smith2024] here\n";
1097        let r = lower(src, &PathBuf::from("test.mos"));
1098        assert!(
1099            r.diagnostics
1100                .iter()
1101                .any(|d| d.def().code() == codes::MOS0045.code()),
1102            "expected MOS0045 because no bibliography records are declared, got {:?}",
1103            r.diagnostics
1104        );
1105        let citation = r
1106            .document
1107            .nodes()
1108            .find(|n| n.kind == NodeKind::Citation)
1109            .expect("citation node");
1110        assert_eq!(
1111            citation.attributes.get("key"),
1112            Some(&AttrValue::Str("smith2024".to_owned())),
1113        );
1114        assert_eq!(
1115            citation.attributes.get("text"),
1116            Some(&AttrValue::Str("[?smith2024?]".to_owned())),
1117        );
1118        let span_text = &src[citation.span.start()..citation.span.end()];
1119        assert_eq!(span_text, "[@smith2024]");
1120    }
1121
1122    #[test]
1123    fn malformed_citation_does_not_create_citation_node() {
1124        // `[@]` with an empty key must surface as a parse warning
1125        // (MOS0039) and produce zero `NodeKind::Citation` nodes: the
1126        // semantic model only carries citations that parsed cleanly.
1127        let r = lower("look [@] here\n", &PathBuf::from("test.mos"));
1128        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1129        assert!(
1130            r.diagnostics
1131                .iter()
1132                .any(|d| d.def().code() == codes::MOS0039.code()),
1133            "expected MOS0039, got {:?}",
1134            r.diagnostics,
1135        );
1136        assert!(
1137            !r.document.nodes().any(|n| n.kind == NodeKind::Citation),
1138            "no Citation nodes expected, got {:?}",
1139            r.document.nodes().map(|n| n.kind).collect::<Vec<_>>(),
1140        );
1141    }
1142
1143    #[test]
1144    fn unterminated_citation_does_not_leak_into_reference_resolver() {
1145        // Regression: an unterminated `[@key` used to advance just past
1146        // `[`, leaving `@key` to be re-tokenized by the `@`-reference
1147        // branch. The resolver then surfaced a bogus `MOS0033 unknown
1148        // label` on what was a citation typo, not a label typo.
1149        // Recovery in the parser now consumes the malformed citation
1150        // extent end-to-end so no phantom `Reference` reaches the
1151        // resolver.
1152        let r = lower(
1153            "see [@smith2024 missing close\n",
1154            &PathBuf::from("test.mos"),
1155        );
1156        assert!(
1157            !r.has_errors(),
1158            "no errors expected, got {:?}",
1159            r.diagnostics,
1160        );
1161        assert!(
1162            r.diagnostics
1163                .iter()
1164                .any(|d| d.def().code() == codes::MOS0039.code()),
1165            "expected MOS0039, got {:?}",
1166            r.diagnostics,
1167        );
1168        assert!(
1169            !r.diagnostics
1170                .iter()
1171                .any(|d| d.def().code() == codes::MOS0033.code()),
1172            "malformed citation must not surface as unknown-label MOS0033: {:?}",
1173            r.diagnostics,
1174        );
1175        assert!(!r.document.nodes().any(|n| n.kind == NodeKind::Citation));
1176        assert!(!r.document.nodes().any(|n| n.kind == NodeKind::Reference));
1177    }
1178
1179    #[test]
1180    fn deferred_multi_key_citation_does_not_leak_into_reference_resolver() {
1181        // `[@a; @b]` is the pandoc multi-key form and is deferred to
1182        // a later bibliography slice. Until then it must round-trip
1183        // as a single `MOS0039` warning with zero `Citation`/`Reference`
1184        // nodes and zero `MOS0033` follow-on errors from the resolver.
1185        let r = lower(
1186            "compare [@smith2024; @jones2025] now\n",
1187            &PathBuf::from("test.mos"),
1188        );
1189        assert!(
1190            !r.has_errors(),
1191            "no errors expected, got {:?}",
1192            r.diagnostics,
1193        );
1194        assert!(
1195            r.diagnostics
1196                .iter()
1197                .any(|d| d.def().code() == codes::MOS0039.code())
1198        );
1199        assert!(
1200            !r.diagnostics
1201                .iter()
1202                .any(|d| d.def().code() == codes::MOS0033.code()),
1203            "multi-key citation must not surface as unknown-label MOS0033: {:?}",
1204            r.diagnostics,
1205        );
1206        assert!(!r.document.nodes().any(|n| n.kind == NodeKind::Citation));
1207        assert!(!r.document.nodes().any(|n| n.kind == NodeKind::Reference));
1208    }
1209
1210    #[test]
1211    fn figure_directive_accepts_positional_path() {
1212        // `#figure("path.png")` is the captionless short form. The
1213        // parser accepts it; the lowerer used to reject it with MOS0024,
1214        // which broke the spelling end-to-end.
1215        let png_path = write_tiny_png("fig_pos.png");
1216        let source = png_path.parent().unwrap().join("main.mos");
1217        std::fs::write(&source, "#figure(\"fig_pos.png\")\n").unwrap();
1218        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1219        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1220        let figure = r
1221            .document
1222            .nodes()
1223            .find(|n| n.kind == NodeKind::Figure)
1224            .expect("Figure node");
1225        // One child: just the image (no caption was supplied).
1226        assert_eq!(figure.children.len(), 1);
1227        let img = r.document.get(figure.children[0]).unwrap();
1228        assert_eq!(img.kind, NodeKind::Image);
1229        assert_eq!(
1230            img.attributes.get("src"),
1231            Some(&AttrValue::Str("fig_pos.png".to_owned()))
1232        );
1233        std::fs::remove_dir_all(png_path.parent().unwrap()).ok();
1234    }
1235
1236    /// Create a unique temp dir for a bibliography test. Salted with the
1237    /// caller's `name` plus a high-resolution timestamp so parallel tests
1238    /// don't collide, mirroring `write_tiny_png`.
1239    fn unique_temp_dir(name: &str) -> PathBuf {
1240        let dir = std::env::temp_dir().join(format!(
1241            "mos-eval-bib-{}-{}",
1242            name,
1243            std::time::SystemTime::now()
1244                .duration_since(std::time::UNIX_EPOCH)
1245                .map_or(0, |d| d.as_nanos())
1246        ));
1247        std::fs::create_dir_all(&dir).unwrap();
1248        dir
1249    }
1250
1251    #[test]
1252    fn bibliography_directive_preserves_resolved_path() {
1253        // A declared `#bibliography("refs.bib")` lowers to a Bibliography
1254        // node that preserves both the literal `src` and the path resolved
1255        // against the source file's directory, so the later BibTeX reader
1256        // can open the database. With the file present there is no warning.
1257        let dir = unique_temp_dir("preserve");
1258        let bib = dir.join("refs.bib");
1259        std::fs::write(&bib, "@book{a, title={A}}\n").unwrap();
1260        let source = dir.join("main.mos");
1261        std::fs::write(&source, "#bibliography(\"refs.bib\")\n").unwrap();
1262        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1263        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1264        let node = r
1265            .document
1266            .nodes()
1267            .find(|n| n.kind == NodeKind::Bibliography)
1268            .expect("Bibliography node");
1269        assert_eq!(
1270            node.attributes.get("src"),
1271            Some(&AttrValue::Str("refs.bib".to_owned()))
1272        );
1273        assert_eq!(
1274            node.attributes.get("resolved_path"),
1275            Some(&AttrValue::Str(bib.to_string_lossy().into_owned()))
1276        );
1277        std::fs::remove_dir_all(&dir).ok();
1278    }
1279
1280    #[test]
1281    fn bibliography_named_path_resolves_against_source_dir() {
1282        // The named `path:` form resolves a subdirectory-relative path the
1283        // same way, exercising project-relative resolution explicitly.
1284        let dir = unique_temp_dir("named");
1285        let sub = dir.join("sources");
1286        std::fs::create_dir_all(&sub).unwrap();
1287        let bib = sub.join("refs.bib");
1288        std::fs::write(&bib, "@book{a, title={A}}\n").unwrap();
1289        let source = dir.join("main.mos");
1290        std::fs::write(&source, "#bibliography(path: \"sources/refs.bib\")\n").unwrap();
1291        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1292        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1293        let node = r
1294            .document
1295            .nodes()
1296            .find(|n| n.kind == NodeKind::Bibliography)
1297            .expect("Bibliography node");
1298        assert_eq!(
1299            node.attributes.get("resolved_path"),
1300            Some(&AttrValue::Str(bib.to_string_lossy().into_owned()))
1301        );
1302        std::fs::remove_dir_all(&dir).ok();
1303    }
1304
1305    #[test]
1306    fn bibliography_src_alias_resolves_against_source_dir() {
1307        // The `src:` alias is accepted for parity with image source naming,
1308        // and preserves the literal source path for the later BibTeX reader.
1309        let dir = unique_temp_dir("src-alias");
1310        let sub = dir.join("sources");
1311        std::fs::create_dir_all(&sub).unwrap();
1312        let bib = sub.join("refs.bib");
1313        std::fs::write(&bib, "@book{a, title={A}}\n").unwrap();
1314        let source = dir.join("main.mos");
1315        std::fs::write(&source, "#bibliography(src: \"sources/refs.bib\")\n").unwrap();
1316        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1317        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1318        let node = r
1319            .document
1320            .nodes()
1321            .find(|n| n.kind == NodeKind::Bibliography)
1322            .expect("Bibliography node");
1323        assert_eq!(
1324            node.attributes.get("src"),
1325            Some(&AttrValue::Str("sources/refs.bib".to_owned()))
1326        );
1327        assert_eq!(
1328            node.attributes.get("resolved_path"),
1329            Some(&AttrValue::Str(bib.to_string_lossy().into_owned()))
1330        );
1331        std::fs::remove_dir_all(&dir).ok();
1332    }
1333
1334    #[test]
1335    fn known_citation_key_resolves_against_bibliography_records() {
1336        // A citation key declared in the parsed BibTeX source is marked
1337        // resolved and its visible text is rewritten to its first-use
1338        // numeric label `[1]` (issue #67).
1339        let dir = unique_temp_dir("citation-known");
1340        let bib = dir.join("refs.bib");
1341        std::fs::write(&bib, "@article{smith2024, title={Known}}\n").unwrap();
1342        let source = dir.join("main.mos");
1343        let source_text =
1344            "#bibliography(\"refs.bib\")\n\n= Intro <intro>\n\nsee [@smith2024] and @intro\n";
1345        std::fs::write(&source, source_text).unwrap();
1346
1347        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1348        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1349
1350        let citation = r
1351            .document
1352            .nodes()
1353            .find(|n| n.kind == NodeKind::Citation)
1354            .expect("Citation node");
1355        assert_eq!(
1356            citation.attributes.get("resolved"),
1357            Some(&AttrValue::Bool(true)),
1358            "known key should be marked resolved for later rendering"
1359        );
1360        assert_eq!(
1361            citation.attributes.get("text"),
1362            Some(&AttrValue::Str("[1]".to_owned())),
1363            "a resolved citation renders its first-use numeric label"
1364        );
1365        assert_eq!(
1366            citation.attributes.get("target_path"),
1367            Some(&AttrValue::Str(bib.to_string_lossy().into_owned()))
1368        );
1369        assert_eq!(
1370            citation.attributes.get("target_span.start"),
1371            Some(&AttrValue::Int(9))
1372        );
1373        assert_eq!(
1374            citation.attributes.get("target_span.end"),
1375            Some(&AttrValue::Int(18))
1376        );
1377
1378        let reference = r
1379            .document
1380            .nodes()
1381            .find(|n| n.kind == NodeKind::Reference)
1382            .expect("Reference node");
1383        assert_eq!(
1384            reference.attributes.get("text"),
1385            Some(&AttrValue::Str("1".to_owned())),
1386            "label references still resolve while citations are checked"
1387        );
1388
1389        std::fs::remove_dir_all(&dir).ok();
1390    }
1391
1392    #[test]
1393    fn label_reference_matching_bib_key_suggests_citation() {
1394        // `@smith2024` resolves to no label but exactly matches a bibliography
1395        // key -- the user meant the citation `[@smith2024]`. MOS0033 carries a
1396        // structured fix swapping `@smith2024` for `[@smith2024]`.
1397        let dir = unique_temp_dir("ref-is-bibkey");
1398        let bib = dir.join("refs.bib");
1399        std::fs::write(&bib, "@article{smith2024, title={Known}}\n").unwrap();
1400        let source = dir.join("main.mos");
1401        let source_text = "#bibliography(\"refs.bib\")\n\nsee @smith2024 here\n";
1402        std::fs::write(&source, source_text).unwrap();
1403
1404        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1405
1406        let diag = r
1407            .diagnostics
1408            .iter()
1409            .find(|d| d.def().code() == codes::MOS0033.code())
1410            .expect("MOS0033 for the @smith2024 reference");
1411        let suggestions = diag.suggestions();
1412        assert_eq!(
1413            suggestions.len(),
1414            1,
1415            "one citation fix, got {suggestions:?}"
1416        );
1417        assert_eq!(suggestions[0].replacement, "[@smith2024]");
1418        assert_eq!(
1419            &source_text[suggestions[0].span.start()..suggestions[0].span.end()],
1420            "@smith2024",
1421            "the fix replaces the whole `@key` token, sigil included"
1422        );
1423
1424        std::fs::remove_dir_all(&dir).ok();
1425    }
1426
1427    #[test]
1428    fn repeated_known_citation_key_reuses_its_first_number() {
1429        // Two citations to the same resolved key render the same numeric
1430        // label -- a key is numbered once, on first use.
1431        let dir = unique_temp_dir("citation-repeat");
1432        let bib = dir.join("refs.bib");
1433        std::fs::write(&bib, "@article{smith2024, title={Known}}\n").unwrap();
1434        let source = dir.join("main.mos");
1435        let source_text =
1436            "#bibliography(\"refs.bib\")\n\nsee [@smith2024] and again [@smith2024]\n";
1437        std::fs::write(&source, source_text).unwrap();
1438
1439        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1440        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1441
1442        let labels: Vec<Option<AttrValue>> = r
1443            .document
1444            .nodes()
1445            .filter(|n| n.kind == NodeKind::Citation)
1446            .map(|n| n.attributes.get("text").cloned())
1447            .collect();
1448        assert_eq!(
1449            labels,
1450            vec![
1451                Some(AttrValue::Str("[1]".to_owned())),
1452                Some(AttrValue::Str("[1]".to_owned())),
1453            ],
1454            "repeated key reuses its first-use number"
1455        );
1456
1457        std::fs::remove_dir_all(&dir).ok();
1458    }
1459
1460    #[test]
1461    fn distinct_known_citation_keys_number_by_first_use_order() {
1462        // Distinct resolved keys are numbered by the order they are first
1463        // cited, independent of their order in the BibTeX source, and a
1464        // later repeat of an earlier key keeps that key's number.
1465        let dir = unique_temp_dir("citation-order");
1466        let bib = dir.join("refs.bib");
1467        // `alpha` precedes `beta` in the database file...
1468        std::fs::write(
1469            &bib,
1470            "@article{alpha, title={A}}\n@article{beta, title={B}}\n",
1471        )
1472        .unwrap();
1473        let source = dir.join("main.mos");
1474        // ...but `beta` is *cited* first, so beta -> [1] and alpha -> [2].
1475        let source_text = "#bibliography(\"refs.bib\")\n\nsee [@beta] then [@alpha] and [@beta]\n";
1476        std::fs::write(&source, source_text).unwrap();
1477
1478        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1479        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1480
1481        let labels: Vec<Option<AttrValue>> = r
1482            .document
1483            .nodes()
1484            .filter(|n| n.kind == NodeKind::Citation)
1485            .map(|n| n.attributes.get("text").cloned())
1486            .collect();
1487        assert_eq!(
1488            labels,
1489            vec![
1490                Some(AttrValue::Str("[1]".to_owned())),
1491                Some(AttrValue::Str("[2]".to_owned())),
1492                Some(AttrValue::Str("[1]".to_owned())),
1493            ],
1494            "numbering follows first citation, not bibliography source order"
1495        );
1496
1497        std::fs::remove_dir_all(&dir).ok();
1498    }
1499
1500    #[test]
1501    fn unknown_citation_key_emits_mos0045_with_source_span() {
1502        let dir = unique_temp_dir("citation-unknown");
1503        let bib = dir.join("refs.bib");
1504        std::fs::write(&bib, "@article{known, title={Known}}\n").unwrap();
1505        let source = dir.join("main.mos");
1506        let source_text = "#bibliography(\"refs.bib\")\n\nsee [@missing]\n";
1507        std::fs::write(&source, source_text).unwrap();
1508
1509        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1510        let missing: Vec<&Diagnostic> = r
1511            .diagnostics
1512            .iter()
1513            .filter(|d| d.def().code() == codes::MOS0045.code())
1514            .collect();
1515        assert_eq!(
1516            missing.len(),
1517            1,
1518            "expected one MOS0045, got {:?}",
1519            r.diagnostics
1520        );
1521        let diagnostic = missing[0];
1522        assert!(
1523            diagnostic.message().contains("`missing`"),
1524            "diagnostic should name missing citation key, got {:?}",
1525            diagnostic.message()
1526        );
1527        assert_eq!(
1528            diagnostic
1529                .span()
1530                .map(|span| &source_text[span.start()..span.end()]),
1531            Some("[@missing]"),
1532            "MOS0045 should point at the citation token"
1533        );
1534
1535        let citation = r
1536            .document
1537            .nodes()
1538            .find(|n| n.kind == NodeKind::Citation)
1539            .expect("Citation node");
1540        assert_eq!(
1541            citation.attributes.get("text"),
1542            Some(&AttrValue::Str("[?missing?]".to_owned())),
1543            "unknown citations keep visible placeholder text"
1544        );
1545        assert_eq!(citation.attributes.get("resolved"), None);
1546
1547        std::fs::remove_dir_all(&dir).ok();
1548    }
1549
1550    #[test]
1551    fn unknown_citation_key_suggests_nearest_loaded_key() {
1552        let dir = unique_temp_dir("citation-nearest-key");
1553        let bib = dir.join("refs.bib");
1554        std::fs::write(&bib, "@article{smith2024, title={Known}}\n").unwrap();
1555        let source = dir.join("main.mos");
1556        let source_text = "#bibliography(\"refs.bib\")\n\nsee [@smit2024]\n";
1557        std::fs::write(&source, source_text).unwrap();
1558
1559        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1560        let diagnostic = r
1561            .diagnostics
1562            .iter()
1563            .find(|d| d.def().code() == codes::MOS0045.code())
1564            .expect("MOS0045 for missing citation key");
1565        let suggestions = diagnostic.suggestions();
1566        assert_eq!(
1567            suggestions.len(),
1568            1,
1569            "expected one nearest-key suggestion, got {suggestions:?}"
1570        );
1571        assert_eq!(suggestions[0].replacement, "smith2024");
1572        assert_eq!(
1573            &source_text[suggestions[0].span.start()..suggestions[0].span.end()],
1574            "smit2024",
1575            "suggestion should replace only the citation key token"
1576        );
1577
1578        std::fs::remove_dir_all(&dir).ok();
1579    }
1580
1581    #[test]
1582    fn unknown_citation_key_tie_has_no_suggestion() {
1583        let dir = unique_temp_dir("citation-nearest-key-tie");
1584        let bib = dir.join("refs.bib");
1585        std::fs::write(&bib, "@article{abx, title={X}}\n@article{aby, title={Y}}\n").unwrap();
1586        let source = dir.join("main.mos");
1587        let source_text = "#bibliography(\"refs.bib\")\n\nsee [@abc]\n";
1588        std::fs::write(&source, source_text).unwrap();
1589
1590        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1591        let diagnostic = r
1592            .diagnostics
1593            .iter()
1594            .find(|d| d.def().code() == codes::MOS0045.code())
1595            .expect("MOS0045 for missing citation key");
1596        assert!(
1597            diagnostic.suggestions().is_empty(),
1598            "ties should not produce a guess: {:?}",
1599            diagnostic.suggestions()
1600        );
1601
1602        std::fs::remove_dir_all(&dir).ok();
1603    }
1604
1605    #[test]
1606    fn multiple_unknown_citations_emit_deterministic_mos0045_diagnostics() {
1607        let dir = unique_temp_dir("citation-multiple-unknown");
1608        let bib = dir.join("refs.bib");
1609        std::fs::write(&bib, "@article{known, title={Known}}\n").unwrap();
1610        let source = dir.join("main.mos");
1611        let source_text = "#bibliography(\"refs.bib\")\n\nsee [@alpha] and [@beta]\n";
1612        std::fs::write(&source, source_text).unwrap();
1613
1614        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1615        let spans: Vec<&str> = r
1616            .diagnostics
1617            .iter()
1618            .filter(|d| d.def().code() == codes::MOS0045.code())
1619            .filter_map(|d| d.span().map(|span| &source_text[span.start()..span.end()]))
1620            .collect();
1621        assert_eq!(
1622            spans,
1623            vec!["[@alpha]", "[@beta]"],
1624            "unknown citation diagnostics should follow document order"
1625        );
1626
1627        std::fs::remove_dir_all(&dir).ok();
1628    }
1629
1630    #[test]
1631    fn incomplete_bibliography_sources_do_not_emit_false_missing_citations() {
1632        let dir = unique_temp_dir("citation-incomplete-bibliography");
1633        let bib = dir.join("refs.bib");
1634        std::fs::write(&bib, "@article{known, title={Known}}\n").unwrap();
1635        let source = dir.join("main.mos");
1636        let source_text = "#bibliography(\"refs.bib\")\n#bibliography(\"missing.bib\")\n\nsee [@known] and [@maybe-in-missing]\n";
1637        std::fs::write(&source, source_text).unwrap();
1638
1639        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1640        assert!(
1641            r.diagnostics
1642                .iter()
1643                .any(|d| d.def().code() == codes::MOS0041.code()),
1644            "expected missing bibliography source warning, got {:?}",
1645            r.diagnostics
1646        );
1647        assert!(
1648            !r.diagnostics
1649                .iter()
1650                .any(|d| d.def().code() == codes::MOS0045.code()),
1651            "incomplete bibliography set must not produce false MOS0045 diagnostics"
1652        );
1653
1654        let known = r
1655            .document
1656            .nodes()
1657            .filter(|n| n.kind == NodeKind::Citation)
1658            .find(|n| n.attributes.get("key") == Some(&AttrValue::Str("known".to_owned())))
1659            .expect("known citation node");
1660        assert_eq!(
1661            known.attributes.get("resolved"),
1662            Some(&AttrValue::Bool(true))
1663        );
1664
1665        std::fs::remove_dir_all(&dir).ok();
1666    }
1667
1668    #[test]
1669    fn duplicate_citation_keys_across_bibliography_sources_emit_mos0046() {
1670        let dir = unique_temp_dir("citation-duplicate-key");
1671        let first = dir.join("first.bib");
1672        let second = dir.join("second.bib");
1673        std::fs::write(&first, "@article{dup, title={First}}\n").unwrap();
1674        std::fs::write(&second, "@book{dup, title={Second}}\n").unwrap();
1675        let source = dir.join("main.mos");
1676        let source_text =
1677            "#bibliography(\"first.bib\")\n#bibliography(\"second.bib\")\n\nsee [@dup]\n";
1678        std::fs::write(&source, source_text).unwrap();
1679
1680        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1681        let duplicates: Vec<&Diagnostic> = r
1682            .diagnostics
1683            .iter()
1684            .filter(|d| d.def().code() == codes::MOS0046.code())
1685            .collect();
1686        assert_eq!(
1687            duplicates.len(),
1688            1,
1689            "expected one MOS0046, got {:?}",
1690            r.diagnostics
1691        );
1692        let diagnostic = duplicates[0];
1693        assert!(
1694            diagnostic.message().contains("`dup`"),
1695            "diagnostic should name duplicate citation key, got {:?}",
1696            diagnostic.message()
1697        );
1698        assert_eq!(
1699            diagnostic
1700                .span()
1701                .map(|span| &source_text[span.start()..span.end()]),
1702            Some("#bibliography(\"second.bib\")"),
1703            "duplicate should point at the later bibliography source"
1704        );
1705
1706        std::fs::remove_dir_all(&dir).ok();
1707    }
1708
1709    #[test]
1710    fn missing_bibliography_path_emits_mos0040() {
1711        // `#bibliography()` with no path is the same authoring mistake as
1712        // `#image()`: a hard error, and no node leaks into the document.
1713        let r = lower("#bibliography()\n", &PathBuf::from("/tmp/no-such.mos"));
1714        assert!(
1715            r.diagnostics
1716                .iter()
1717                .any(|d| d.def().code() == codes::MOS0040.code()),
1718            "expected MOS0040, got {:?}",
1719            r.diagnostics
1720        );
1721        assert!(!r.document.nodes().any(|n| n.kind == NodeKind::Bibliography));
1722    }
1723
1724    #[test]
1725    fn empty_bibliography_path_emits_mos0040() {
1726        // `#bibliography("")` is a missing-path mistake, not an I/O failure;
1727        // it surfaces as MOS0040 and never reaches the filesystem check.
1728        let r = lower(
1729            "#bibliography(\"\")\n",
1730            &PathBuf::from("/tmp/whatever/main.mos"),
1731        );
1732        assert!(
1733            r.diagnostics
1734                .iter()
1735                .any(|d| d.def().code() == codes::MOS0040.code()),
1736            "expected MOS0040, got {:?}",
1737            r.diagnostics
1738        );
1739        assert!(
1740            !r.diagnostics
1741                .iter()
1742                .any(|d| d.def().code() == codes::MOS0041.code()),
1743            "empty path must not trip the filesystem warning: {:?}",
1744            r.diagnostics
1745        );
1746    }
1747
1748    #[test]
1749    fn non_string_bibliography_path_emits_type_mismatch_only() {
1750        // A path-shaped arg with the wrong type is not "missing"; report
1751        // the type mismatch once and do not also emit missing-path/I/O noise.
1752        let r = lower(
1753            "#bibliography(src: 12pt)\n",
1754            &PathBuf::from("/tmp/whatever/main.mos"),
1755        );
1756        assert!(
1757            r.diagnostics
1758                .iter()
1759                .any(|d| d.def().code() == codes::MOS0020.code()),
1760            "expected MOS0020, got {:?}",
1761            r.diagnostics
1762        );
1763        assert!(
1764            !r.diagnostics
1765                .iter()
1766                .any(|d| d.def().code() == codes::MOS0040.code()),
1767            "non-string path must not also emit MOS0040: {:?}",
1768            r.diagnostics
1769        );
1770        assert!(
1771            !r.diagnostics
1772                .iter()
1773                .any(|d| d.def().code() == codes::MOS0041.code()),
1774            "non-string path must not reach filesystem warning: {:?}",
1775            r.diagnostics
1776        );
1777        assert!(!r.document.nodes().any(|n| n.kind == NodeKind::Bibliography));
1778    }
1779
1780    #[test]
1781    fn duplicate_bibliography_path_keeps_first_path() {
1782        // Duplicate path declarations are an authoring error, but the first
1783        // source still wins so later accidental args cannot silently redirect
1784        // the bibliography boundary.
1785        let dir = unique_temp_dir("duplicate-path");
1786        let first = dir.join("first.bib");
1787        let second = dir.join("second.bib");
1788        std::fs::write(&first, "@book{first}\n").unwrap();
1789        std::fs::write(&second, "@book{second}\n").unwrap();
1790        let source = dir.join("main.mos");
1791        let source_text = "#bibliography(\"first.bib\", path: \"second.bib\")\n";
1792        std::fs::write(&source, source_text).unwrap();
1793        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1794        let duplicate_path_diagnostics: Vec<&Diagnostic> = r
1795            .diagnostics
1796            .iter()
1797            .filter(|d| d.def().code() == codes::MOS0042.code())
1798            .collect();
1799        assert_eq!(
1800            duplicate_path_diagnostics.len(),
1801            1,
1802            "expected one MOS0042, got {:?}",
1803            r.diagnostics
1804        );
1805        let duplicate = duplicate_path_diagnostics[0];
1806        assert_eq!(
1807            duplicate
1808                .span()
1809                .map(|span| &source_text[span.start()..span.end()]),
1810            Some("\"second.bib\""),
1811            "duplicate path diagnostic should point at the later path value"
1812        );
1813        let node = r
1814            .document
1815            .nodes()
1816            .find(|n| n.kind == NodeKind::Bibliography)
1817            .expect("Bibliography node");
1818        assert_eq!(
1819            node.attributes.get("src"),
1820            Some(&AttrValue::Str("first.bib".to_owned()))
1821        );
1822        assert_eq!(
1823            node.attributes.get("resolved_path"),
1824            Some(&AttrValue::Str(first.to_string_lossy().into_owned()))
1825        );
1826        std::fs::remove_dir_all(&dir).ok();
1827    }
1828
1829    #[test]
1830    fn missing_bibliography_source_warns_mos0041_but_keeps_node() {
1831        // A declared-but-absent database is a non-fatal warning: the build
1832        // still succeeds and the node is emitted with its resolved path so
1833        // the later BibTeX slice can act on it.
1834        let dir = unique_temp_dir("absent");
1835        let source = dir.join("main.mos");
1836        std::fs::write(&source, "#bibliography(\"nope.bib\")\n").unwrap();
1837        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1838        assert!(
1839            !r.has_errors(),
1840            "a missing source is a warning, not an error: {:?}",
1841            r.diagnostics
1842        );
1843        assert!(
1844            r.diagnostics
1845                .iter()
1846                .any(|d| d.def().code() == codes::MOS0041.code()),
1847            "expected MOS0041, got {:?}",
1848            r.diagnostics
1849        );
1850        let node = r
1851            .document
1852            .nodes()
1853            .find(|n| n.kind == NodeKind::Bibliography)
1854            .expect("Bibliography node still emitted on a missing source");
1855        assert_eq!(
1856            node.attributes.get("resolved_path"),
1857            Some(&AttrValue::Str(
1858                dir.join("nope.bib").to_string_lossy().into_owned()
1859            ))
1860        );
1861        std::fs::remove_dir_all(&dir).ok();
1862    }
1863
1864    #[test]
1865    fn unknown_bibliography_arg_emits_mos0015() {
1866        // Arguments beyond the path (e.g. a future `style:`) are rejected
1867        // now so the directive's surface stays narrow until later slices
1868        // grow it deliberately.
1869        let dir = unique_temp_dir("unknownarg");
1870        std::fs::write(dir.join("refs.bib"), "@book{a}\n").unwrap();
1871        let source = dir.join("main.mos");
1872        std::fs::write(&source, "#bibliography(\"refs.bib\", style: \"ieee\")\n").unwrap();
1873        let r = lower(&std::fs::read_to_string(&source).unwrap(), &source);
1874        assert!(
1875            r.diagnostics
1876                .iter()
1877                .any(|d| d.def().code() == codes::MOS0015.code()),
1878            "expected MOS0015, got {:?}",
1879            r.diagnostics
1880        );
1881        std::fs::remove_dir_all(&dir).ok();
1882    }
1883}