Skip to main content

mos_parse/
parser.rs

1use std::path::{Path, PathBuf};
2
3use mos_core::{Diagnostic, DiagnosticDef, SourceSpan};
4
5use crate::support::list_marker_at;
6use crate::{Item, ParseResult, SyntaxTree};
7
8pub(crate) struct Parser<'a> {
9    pub(crate) src: &'a str,
10    pub(crate) file: PathBuf,
11    pub(crate) pos: usize,
12    pub(crate) items: Vec<Item>,
13    pub(crate) diagnostics: Vec<Diagnostic>,
14}
15
16impl<'a> Parser<'a> {
17    pub(crate) fn new(src: &'a str, file: &Path) -> Self {
18        Self {
19            src,
20            file: file.to_path_buf(),
21            pos: 0,
22            items: Vec::new(),
23            diagnostics: Vec::new(),
24        }
25    }
26
27    pub(crate) fn run(mut self) -> ParseResult {
28        if self.pos == 0 && self.starts_with("#!") {
29            self.skip_line();
30        }
31        while self.pos < self.src.len() {
32            if self.at_blank_line() {
33                self.skip_line();
34                continue;
35            }
36            if let Some(kw) = self.at_directive_keyword() {
37                self.parse_directive_block(kw);
38            } else if self.starts_with("=") {
39                self.parse_heading();
40            } else if self.at_list_marker() {
41                self.parse_list();
42            } else {
43                self.parse_paragraph();
44            }
45        }
46        ParseResult {
47            tree: SyntaxTree {
48                file: self.file,
49                items: self.items,
50            },
51            diagnostics: self.diagnostics,
52        }
53    }
54
55    pub(crate) fn at_list_marker(&self) -> bool {
56        list_marker_at(self.src.as_bytes(), self.pos).is_some()
57    }
58
59    pub(crate) fn span(&self, start: usize, end: usize) -> SourceSpan {
60        SourceSpan::new(self.file.clone(), start, end)
61    }
62
63    pub(crate) fn starts_with(&self, prefix: &str) -> bool {
64        self.src.as_bytes()[self.pos..].starts_with(prefix.as_bytes())
65    }
66
67    pub(crate) fn at_directive_keyword(&self) -> Option<&'static str> {
68        const KEYWORDS: &[&str] = &["set", "image", "figure", "bibliography", "pre", "code"];
69        if !self.starts_with("#") {
70            return None;
71        }
72        let after_hash = self.pos + 1;
73        let bytes = self.src.as_bytes();
74        for kw in KEYWORDS {
75            let end = after_hash + kw.len();
76            if end > bytes.len() {
77                continue;
78            }
79            if &bytes[after_hash..end] != kw.as_bytes() {
80                continue;
81            }
82            let boundary = bytes.get(end).is_none_or(|&b| {
83                b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' || b == b'(' || b == b'['
84            });
85            if boundary {
86                return Some(kw);
87            }
88        }
89        None
90    }
91
92    pub(crate) fn at_blank_line(&self) -> bool {
93        let bytes = self.src.as_bytes();
94        let mut i = self.pos;
95        while i < bytes.len() && bytes[i] != b'\n' {
96            if !bytes[i].is_ascii_whitespace() {
97                return false;
98            }
99            i += 1;
100        }
101        true
102    }
103
104    pub(crate) fn skip_line(&mut self) {
105        let bytes = self.src.as_bytes();
106        while self.pos < bytes.len() && bytes[self.pos] != b'\n' {
107            self.pos += 1;
108        }
109        if self.pos < bytes.len() {
110            self.pos += 1;
111        }
112    }
113
114    pub(crate) fn current_line_bounds(&self) -> (usize, usize, usize) {
115        self.line_bounds_from(self.pos)
116    }
117
118    pub(crate) fn line_bounds_from(&self, start: usize) -> (usize, usize, usize) {
119        let bytes = self.src.as_bytes();
120        let mut end = start;
121        while end < bytes.len() && bytes[end] != b'\n' {
122            end += 1;
123        }
124        let line_end = if end < bytes.len() { end + 1 } else { end };
125        let mut content_end = end;
126        if content_end > start && bytes[content_end - 1] == b'\r' {
127            content_end -= 1;
128        }
129        (start, content_end, line_end)
130    }
131
132    pub(crate) fn warn(
133        &self,
134        def: &'static DiagnosticDef,
135        message: &str,
136        start: usize,
137        end: usize,
138    ) -> Diagnostic {
139        Diagnostic::simple(def, None, message).with_span(self.span(start, end))
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use std::path::PathBuf;
146
147    use mos_core::{CollectingSink, Diagnostic, DiagnosticCode, Severity, codes};
148
149    use crate::*;
150
151    fn parse_str(src: &str) -> ParseResult {
152        let mut sink = CollectingSink::new();
153        let file = PathBuf::from("test.mos");
154        let result = parse(src, &file, &mut sink);
155        assert!(result.is_ok(), "parse structurally aborted: {result:?}");
156        let tree = match result {
157            Ok(tree) => tree,
158            Err(_) => SyntaxTree {
159                file,
160                items: Vec::new(),
161            },
162        };
163        ParseResult {
164            tree,
165            diagnostics: sink.into_diagnostics(),
166        }
167    }
168
169    fn diagnostic_for(r: &ParseResult, code: DiagnosticCode) -> &Diagnostic {
170        let diagnostic = r
171            .diagnostics
172            .iter()
173            .find(|diagnostic| diagnostic.def().code() == code);
174        assert!(
175            diagnostic.is_some(),
176            "expected diagnostic {code}, got {:?}",
177            r.diagnostics
178        );
179        diagnostic.unwrap_or_else(|| &r.diagnostics[0])
180    }
181
182    fn required_offset(offset: Option<usize>, label: &str, src: &str) -> usize {
183        assert!(offset.is_some(), "expected {label} in {src:?}");
184        offset.unwrap_or(0)
185    }
186
187    fn assert_single_insertion(diagnostic: &Diagnostic, offset: usize, replacement: &str) {
188        let suggestions = diagnostic.suggestions();
189        assert_eq!(
190            suggestions.len(),
191            1,
192            "expected one suggestion, got {suggestions:?}"
193        );
194        let suggestion = &suggestions[0];
195        assert_eq!(suggestion.span.start(), offset);
196        assert_eq!(suggestion.span.end(), offset);
197        assert_eq!(suggestion.replacement, replacement);
198    }
199
200    #[test]
201    fn empty_source() {
202        let r = parse_str("");
203        assert!(r.tree.items.is_empty());
204        assert!(!r.has_errors());
205    }
206
207    #[test]
208    fn byte_zero_shebang_is_ignored() {
209        let src = "#!/usr/bin/env -S mos build --open\n= Hello\n";
210        let r = parse_str(src);
211        assert!(!r.has_errors(), "{:?}", r.diagnostics);
212        assert_eq!(r.tree.items.len(), 1);
213        let heading = r.tree.items[0].as_heading();
214        assert!(
215            heading.is_some(),
216            "expected heading, got {:?}",
217            r.tree.items[0]
218        );
219        let Some((level, inlines, span)) = heading else {
220            return;
221        };
222
223        assert_eq!(level, 1);
224        assert_eq!(inlines[0].text, "Hello");
225        assert_eq!(span.start(), src.find("= Hello").unwrap_or(0));
226    }
227
228    #[test]
229    fn byte_zero_shebang_with_crlf_is_ignored() {
230        let src = "#!/usr/bin/env -S mos build --open\r\n= Hello\r\n";
231        let r = parse_str(src);
232        assert!(!r.has_errors(), "{:?}", r.diagnostics);
233        assert_eq!(r.tree.items.len(), 1);
234        let heading = r.tree.items[0].as_heading();
235        assert!(
236            heading.is_some(),
237            "expected heading, got {:?}",
238            r.tree.items[0]
239        );
240        let Some((_, inlines, span)) = heading else {
241            return;
242        };
243
244        assert_eq!(inlines[0].text, "Hello");
245        assert_eq!(span.start(), src.find("= Hello").unwrap_or(0));
246    }
247
248    #[test]
249    fn later_shebang_text_stays_paragraph_text() {
250        let src = "Before\n#! not script metadata\n";
251        let r = parse_str(src);
252        assert!(!r.has_errors(), "{:?}", r.diagnostics);
253        assert_eq!(r.tree.items.len(), 1);
254        let paragraph = r.tree.items[0].as_paragraph();
255        assert!(
256            paragraph.is_some(),
257            "expected paragraph, got {:?}",
258            r.tree.items[0]
259        );
260        let Some((inlines, _)) = paragraph else {
261            return;
262        };
263
264        assert_eq!(inlines.len(), 1);
265        assert_eq!(inlines[0].text, "Before\n#! not script metadata");
266    }
267
268    #[test]
269    fn diagnostics_after_shebang_keep_source_offsets() {
270        let src = "#!/usr/bin/env -S mos build --open\n*unclosed\n";
271        let r = parse_str(src);
272        assert!(!r.has_errors());
273        let diagnostic = diagnostic_for(&r, codes::MOS0031.code());
274
275        assert_eq!(
276            diagnostic.span().map(mos_core::SourceSpan::start),
277            src.find("*unclosed")
278        );
279    }
280
281    #[test]
282    fn single_heading() {
283        let r = parse_str("= Hello\n");
284        assert!(!r.has_errors());
285        assert_eq!(r.tree.items.len(), 1);
286        let (level, inlines, _) = r.tree.items[0].as_heading().unwrap();
287        assert_eq!(level, 1);
288        assert_eq!(inlines.len(), 1);
289        assert_eq!(inlines[0].text, "Hello");
290        assert_eq!(inlines[0].kind, InlineKind::Text);
291    }
292
293    #[test]
294    fn heading_levels() {
295        let src = "= One\n== Two\n=== Three\n";
296        let r = parse_str(src);
297        assert!(!r.has_errors());
298        let levels: Vec<u8> = r
299            .tree
300            .items
301            .iter()
302            .filter_map(|i| i.as_heading().map(|(l, _, _)| l))
303            .collect();
304        assert_eq!(levels, vec![1, 2, 3]);
305    }
306
307    #[test]
308    fn paragraph_collects_lines() {
309        let src = "first line\nsecond line\n\nnext para\n";
310        let r = parse_str(src);
311        assert!(!r.has_errors());
312        assert_eq!(r.tree.items.len(), 2);
313        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
314        assert_eq!(inlines.len(), 1);
315        assert_eq!(inlines[0].text, "first line\nsecond line");
316    }
317
318    #[test]
319    fn inline_emphasis_strong_code() {
320        let src = "a *b* c **d** e `f` g\n";
321        let r = parse_str(src);
322        assert!(!r.has_errors(), "{:?}", r.diagnostics);
323        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
324        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
325        assert_eq!(
326            kinds,
327            vec![
328                InlineKind::Text,
329                InlineKind::Emphasis,
330                InlineKind::Text,
331                InlineKind::Strong,
332                InlineKind::Text,
333                InlineKind::Code,
334                InlineKind::Text,
335            ]
336        );
337        let texts: Vec<&str> = inlines.iter().map(|i| i.text.as_str()).collect();
338        assert_eq!(texts, vec!["a ", "b", " c ", "d", " e ", "f", " g"]);
339    }
340
341    #[test]
342    fn nested_bold_italic_triple_delimiter() {
343        let r = parse_str("***x***\n");
344        assert!(!r.has_errors(), "{:?}", r.diagnostics);
345        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
346        assert_eq!(inlines.len(), 1, "got {inlines:?}");
347        assert_eq!(inlines[0].kind, InlineKind::BoldItalic);
348        assert_eq!(inlines[0].text, "x");
349    }
350
351    #[test]
352    fn nested_emphasis_inside_strong() {
353        let r = parse_str("**a *b* c**\n");
354        assert!(!r.has_errors(), "{:?}", r.diagnostics);
355        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
356        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
357        assert_eq!(
358            kinds,
359            vec![
360                InlineKind::Strong,
361                InlineKind::BoldItalic,
362                InlineKind::Strong,
363            ],
364            "got {inlines:?}",
365        );
366        let texts: Vec<&str> = inlines.iter().map(|i| i.text.as_str()).collect();
367        assert_eq!(texts, vec!["a ", "b", " c"]);
368    }
369
370    #[test]
371    fn nested_strong_inside_emphasis() {
372        let r = parse_str("*a **b** c*\n");
373        assert!(!r.has_errors(), "{:?}", r.diagnostics);
374        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
375        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
376        assert_eq!(
377            kinds,
378            vec![
379                InlineKind::Emphasis,
380                InlineKind::BoldItalic,
381                InlineKind::Emphasis,
382            ],
383            "got {inlines:?}",
384        );
385        let texts: Vec<&str> = inlines.iter().map(|i| i.text.as_str()).collect();
386        assert_eq!(texts, vec!["a ", "b", " c"]);
387    }
388
389    #[test]
390    fn ambiguous_inner_star_stays_strong_text() {
391        let r = parse_str("**a*b**\n");
392        assert!(!r.has_errors(), "{:?}", r.diagnostics);
393        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
394        assert_eq!(inlines.len(), 1, "got {inlines:?}");
395        assert_eq!(inlines[0].kind, InlineKind::Strong);
396        assert_eq!(inlines[0].text, "a*b");
397    }
398
399    #[test]
400    fn code_spans_do_not_parse_nested_emphasis() {
401        let r = parse_str("`***x***`\n");
402        assert!(!r.has_errors(), "{:?}", r.diagnostics);
403        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
404        assert_eq!(inlines.len(), 1, "got {inlines:?}");
405        assert_eq!(inlines[0].kind, InlineKind::Code);
406        assert_eq!(inlines[0].text, "***x***");
407    }
408
409    #[test]
410    fn unterminated_emphasis_warns() {
411        let src = "hi *there\n";
412        let r = parse_str(src);
413        assert!(!r.has_errors());
414        let diagnostic = diagnostic_for(&r, codes::MOS0031.code());
415        assert_eq!(diagnostic.severity(), Severity::Warning);
416        assert_single_insertion(
417            diagnostic,
418            required_offset(src.find('\n'), "line ending", src),
419            "*",
420        );
421    }
422
423    #[test]
424    fn unterminated_strong_warns() {
425        let src = "hi **there\n";
426        let r = parse_str(src);
427        assert!(!r.has_errors());
428        let diagnostic = diagnostic_for(&r, codes::MOS0028.code());
429        assert_eq!(diagnostic.severity(), Severity::Warning);
430        assert_single_insertion(
431            diagnostic,
432            required_offset(src.find('\n'), "line ending", src),
433            "**",
434        );
435    }
436
437    #[test]
438    fn unterminated_code_warns_and_suggests_closer() {
439        let src = "hi `there\n";
440        let r = parse_str(src);
441        assert!(!r.has_errors());
442        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
443        assert_eq!(diagnostic.severity(), Severity::Warning);
444        assert_single_insertion(
445            diagnostic,
446            required_offset(src.find('\n'), "line ending", src),
447            "`",
448        );
449    }
450
451    #[test]
452    fn terminated_code_does_not_warn() {
453        let r = parse_str("hi `there`\n");
454
455        assert!(
456            r.diagnostics
457                .iter()
458                .all(|diagnostic| diagnostic.def().code() != codes::MOS0034.code()),
459            "{:?}",
460            r.diagnostics
461        );
462    }
463
464    #[test]
465    fn unterminated_code_at_eof_suggests_closer() {
466        let src = "hi `there";
467        let r = parse_str(src);
468        assert!(!r.has_errors());
469        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
470
471        assert_single_insertion(diagnostic, src.len(), "`");
472    }
473
474    #[test]
475    fn unterminated_code_with_long_plain_tail_suggests_closer() {
476        let src = concat!(
477            "Before `this code-like run keeps scanning through several words, ",
478            "punctuation, and @refs without meeting a styled delimiter\n",
479        );
480        let r = parse_str(src);
481        assert!(!r.has_errors());
482        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
483
484        assert_single_insertion(
485            diagnostic,
486            required_offset(src.find('\n'), "line ending", src),
487            "`",
488        );
489    }
490
491    #[test]
492    fn unterminated_code_before_crlf_suggests_closer() {
493        let src = "hi `there\r\n";
494        let r = parse_str(src);
495        assert!(!r.has_errors());
496        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
497
498        assert_single_insertion(
499            diagnostic,
500            required_offset(src.find('\r'), "CRLF", src),
501            "`",
502        );
503    }
504
505    #[test]
506    fn unterminated_code_across_lf_lines_suggests_at_paragraph_end() {
507        let src = "hi `there\nstill code-like text\n";
508        let r = parse_str(src);
509        assert!(!r.has_errors());
510        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
511
512        assert_single_insertion(
513            diagnostic,
514            required_offset(src.rfind('\n'), "paragraph line ending", src),
515            "`",
516        );
517    }
518
519    #[test]
520    fn unterminated_code_across_crlf_lines_suggests_at_paragraph_end() {
521        let src = "hi `there\r\nstill code-like text\r\n";
522        let r = parse_str(src);
523        assert!(!r.has_errors());
524        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
525
526        assert_single_insertion(
527            diagnostic,
528            required_offset(src.rfind('\r'), "paragraph CRLF ending", src),
529            "`",
530        );
531    }
532
533    #[test]
534    fn unterminated_code_inside_emphasis_suggests_before_outer_closer() {
535        let src = "*a `b*\n";
536        let r = parse_str(src);
537        assert!(!r.has_errors());
538        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
539
540        assert_single_insertion(
541            diagnostic,
542            required_offset(src.rfind('*'), "outer closer", src),
543            "`",
544        );
545        assert!(
546            r.diagnostics
547                .iter()
548                .all(|diagnostic| diagnostic.def().code() != codes::MOS0031.code()),
549            "{:?}",
550            r.diagnostics
551        );
552    }
553
554    #[test]
555    fn unterminated_code_inside_strong_suggests_before_outer_closer() {
556        let src = "**a `b**\n";
557        let r = parse_str(src);
558        assert!(!r.has_errors());
559        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
560
561        assert_single_insertion(
562            diagnostic,
563            required_offset(src.rfind("**"), "strong closer", src),
564            "`",
565        );
566        assert!(
567            r.diagnostics
568                .iter()
569                .all(|diagnostic| diagnostic.def().code() != codes::MOS0028.code()),
570            "{:?}",
571            r.diagnostics
572        );
573    }
574
575    #[test]
576    fn unterminated_code_inside_strong_across_lines_suggests_before_outer_closer() {
577        let src = "**a `b\ncontinued**\n";
578        let r = parse_str(src);
579        assert!(!r.has_errors());
580        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
581
582        assert_single_insertion(
583            diagnostic,
584            required_offset(src.rfind("**"), "strong closer", src),
585            "`",
586        );
587        assert!(
588            r.diagnostics
589                .iter()
590                .all(|diagnostic| diagnostic.def().code() != codes::MOS0028.code()),
591            "{:?}",
592            r.diagnostics
593        );
594    }
595
596    #[test]
597    fn unterminated_code_inside_bold_italic_suggests_before_outer_closer() {
598        let src = "***a `b***\n";
599        let r = parse_str(src);
600        assert!(!r.has_errors());
601        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
602
603        assert_single_insertion(
604            diagnostic,
605            required_offset(src.rfind("***"), "bold italic closer", src),
606            "`",
607        );
608        assert!(
609            r.diagnostics.iter().all(|diagnostic| {
610                diagnostic.def().code() != codes::MOS0028.code()
611                    && diagnostic.def().code() != codes::MOS0031.code()
612            }),
613            "{:?}",
614            r.diagnostics
615        );
616    }
617
618    #[test]
619    fn unterminated_code_before_emphasis_suppresses_closer_suggestion() {
620        let src = "hi `a *b*\n";
621        let r = parse_str(src);
622        assert!(!r.has_errors());
623        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
624
625        assert!(diagnostic.suggestions().is_empty());
626        assert!(
627            r.diagnostics
628                .iter()
629                .all(|diagnostic| diagnostic.def().code() != codes::MOS0031.code()),
630            "{:?}",
631            r.diagnostics
632        );
633    }
634
635    #[test]
636    fn unterminated_code_with_long_styled_tail_suppresses_closer_suggestion() {
637        let src = concat!(
638            "Before `this code-like run keeps scanning through a longer ",
639            "sentence until it reaches *valid emphasis* later in the paragraph\n",
640        );
641        let r = parse_str(src);
642        assert!(!r.has_errors());
643        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
644
645        assert!(diagnostic.suggestions().is_empty());
646        assert!(
647            r.diagnostics
648                .iter()
649                .all(|diagnostic| diagnostic.def().code() != codes::MOS0031.code()),
650            "{:?}",
651            r.diagnostics
652        );
653    }
654
655    #[test]
656    fn unterminated_code_before_strong_suppresses_closer_suggestion() {
657        let src = "hi `a **b**\n";
658        let r = parse_str(src);
659        assert!(!r.has_errors());
660        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
661
662        assert!(diagnostic.suggestions().is_empty());
663        assert!(
664            r.diagnostics
665                .iter()
666                .all(|diagnostic| diagnostic.def().code() != codes::MOS0028.code()),
667            "{:?}",
668            r.diagnostics
669        );
670    }
671
672    #[test]
673    fn unterminated_code_across_lines_before_emphasis_suppresses_closer_suggestion() {
674        let src = "hi `a\n*b*\n";
675        let r = parse_str(src);
676        assert!(!r.has_errors());
677        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
678
679        assert!(diagnostic.suggestions().is_empty());
680        assert!(
681            r.diagnostics
682                .iter()
683                .all(|diagnostic| diagnostic.def().code() != codes::MOS0031.code()),
684            "{:?}",
685            r.diagnostics
686        );
687    }
688
689    #[test]
690    fn unterminated_code_before_nested_strong_suppresses_closer_suggestion() {
691        let src = "*a `b **c** d*\n";
692        let r = parse_str(src);
693        assert!(!r.has_errors());
694        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
695
696        assert!(diagnostic.suggestions().is_empty());
697        assert!(
698            r.diagnostics.iter().all(|diagnostic| {
699                diagnostic.def().code() != codes::MOS0028.code()
700                    && diagnostic.def().code() != codes::MOS0031.code()
701            }),
702            "{:?}",
703            r.diagnostics
704        );
705    }
706
707    #[test]
708    fn unterminated_code_inside_strong_before_nested_emphasis_suppresses_closer_suggestion() {
709        let src = "**a `b *c* d**\n";
710        let r = parse_str(src);
711        assert!(!r.has_errors());
712        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
713
714        assert!(diagnostic.suggestions().is_empty());
715        assert!(
716            r.diagnostics.iter().all(|diagnostic| {
717                diagnostic.def().code() != codes::MOS0028.code()
718                    && diagnostic.def().code() != codes::MOS0031.code()
719            }),
720            "{:?}",
721            r.diagnostics
722        );
723    }
724
725    #[test]
726    fn unterminated_code_before_literal_star_suppresses_closer_suggestion() {
727        let src = "hi `a * b\n";
728        let r = parse_str(src);
729        assert!(!r.has_errors());
730        let diagnostic = diagnostic_for(&r, codes::MOS0034.code());
731
732        assert!(diagnostic.suggestions().is_empty());
733    }
734
735    #[test]
736    fn nested_unterminated_emphasis_warns_without_suggestion() {
737        let r = parse_str("hi *a **b**\n");
738        let diagnostic = diagnostic_for(&r, codes::MOS0031.code());
739
740        assert!(diagnostic.suggestions().is_empty());
741    }
742
743    #[test]
744    fn nested_unterminated_strong_warns_without_suggestion() {
745        let r = parse_str("hi **a *b\n");
746        let diagnostic = diagnostic_for(&r, codes::MOS0028.code());
747
748        assert!(diagnostic.suggestions().is_empty());
749    }
750
751    #[test]
752    fn set_block_simple() {
753        let r = parse_str("#set page(paper: \"A4\")\n");
754        assert!(!r.has_errors(), "{:?}", r.diagnostics);
755        let (name, args, _) = r.tree.items[0].as_set().unwrap();
756        assert_eq!(name, "page");
757        assert_eq!(args.len(), 1);
758        assert_eq!(args[0].key(), Some("paper"));
759        assert_eq!(args[0].value(), &SetValue::Str("A4".to_owned()));
760    }
761
762    #[test]
763    fn set_block_multiline() {
764        let src = "#set document(\n  title: \"x\",\n  author: \"y\",\n)\n\n= After\n";
765        let r = parse_str(src);
766        assert!(!r.has_errors(), "{:?}", r.diagnostics);
767        assert_eq!(r.tree.items.len(), 2);
768        let (name, args, _) = r.tree.items[0].as_set().unwrap();
769        assert_eq!(name, "document");
770        assert_eq!(args.len(), 2);
771        assert_eq!(args[0].key(), Some("title"));
772        assert_eq!(args[0].value(), &SetValue::Str("x".to_owned()));
773        assert_eq!(args[1].key(), Some("author"));
774        assert_eq!(args[1].value(), &SetValue::Str("y".to_owned()));
775        assert_eq!(r.tree.items[1].as_heading().unwrap().0, 1);
776    }
777
778    #[test]
779    fn set_value_length_units() {
780        let src = "#set page(margin: 24mm)\n#set text(size: 11pt, leading: 1.35, scale: 2em)\n";
781        let r = parse_str(src);
782        assert!(!r.has_errors(), "{:?}", r.diagnostics);
783        let (_, page_args, _) = r.tree.items[0].as_set().unwrap();
784        assert_eq!(
785            page_args[0].value(),
786            &SetValue::Length(24.0, LengthUnit::Mm)
787        );
788        let (_, text_args, _) = r.tree.items[1].as_set().unwrap();
789        assert_eq!(
790            text_args[0].value(),
791            &SetValue::Length(11.0, LengthUnit::Pt)
792        );
793        assert_eq!(text_args[1].value(), &SetValue::Float(1.35));
794        assert_eq!(text_args[2].value(), &SetValue::Length(2.0, LengthUnit::Em));
795    }
796
797    #[test]
798    fn set_value_int_and_ident() {
799        let r = parse_str("#set foo(count: 42, alignment: bottom-center)\n");
800        assert!(!r.has_errors(), "{:?}", r.diagnostics);
801        let (_, args, _) = r.tree.items[0].as_set().unwrap();
802        assert_eq!(args[0].value(), &SetValue::Int(42));
803        assert_eq!(
804            args[1].value(),
805            &SetValue::Ident("bottom-center".to_owned())
806        );
807    }
808
809    #[test]
810    fn set_value_trailing_comma_ok() {
811        let r = parse_str("#set page(paper: \"A4\",)\n");
812        assert!(!r.has_errors(), "{:?}", r.diagnostics);
813        let (_, args, _) = r.tree.items[0].as_set().unwrap();
814        assert_eq!(args.len(), 1);
815    }
816
817    #[test]
818    fn set_string_escape_sequences() {
819        let r = parse_str("#set foo(s: \"a\\\"b\\nc\\\\d\")\n");
820        assert!(!r.has_errors(), "{:?}", r.diagnostics);
821        let (_, args, _) = r.tree.items[0].as_set().unwrap();
822        assert_eq!(args[0].value(), &SetValue::Str("a\"b\nc\\d".to_owned()));
823    }
824
825    #[test]
826    fn set_unknown_escape_with_multibyte_does_not_panic() {
827        // Regression: the byte after `\` may be the leading byte of a
828        // multibyte UTF-8 scalar (here `é` = 0xC3 0xA9). Advancing by 2
829        // would leave the cursor mid-codepoint and the next slice
830        // would panic. The parser must walk to a char boundary and
831        // emit MOS0022 instead.
832        let r = parse_str("#set foo(s: \"\\é\")\n");
833        assert!(
834            r.diagnostics
835                .iter()
836                .any(|d| d.def().code() == codes::MOS0022.code()),
837            "expected MOS0022, got {:?}",
838            r.diagnostics
839        );
840    }
841
842    #[test]
843    fn set_unknown_unit_emits_mos0022() {
844        let r = parse_str("#set page(margin: 24xx)\n");
845        assert!(
846            r.diagnostics
847                .iter()
848                .any(|d| d.def().code() == codes::MOS0022.code()),
849            "expected MOS0022, got {:?}",
850            r.diagnostics
851        );
852    }
853
854    #[test]
855    fn set_lone_minus_emits_mos0022() {
856        // `-` not followed by a digit is a malformed number literal.
857        let r = parse_str("#set foo(x: -)\n");
858        assert!(
859            r.diagnostics
860                .iter()
861                .any(|d| d.def().code() == codes::MOS0022.code()),
862            "expected MOS0022, got {:?}",
863            r.diagnostics
864        );
865    }
866
867    #[test]
868    fn set_without_identifier_emits_mos0010() {
869        // `#set` followed only by whitespace before the newline has no
870        // target identifier. The parser must diagnose with MOS0010 and
871        // skip the line rather than treat the next line as the body.
872        let r = parse_str("#set\nbody\n");
873        let mos0010: Vec<_> = r
874            .diagnostics
875            .iter()
876            .filter(|d| d.def().code() == codes::MOS0010.code())
877            .collect();
878        assert_eq!(
879            mos0010.len(),
880            1,
881            "expected exactly one MOS0010, got {:?}",
882            r.diagnostics
883        );
884        assert!(
885            mos0010[0].message().contains("#set"),
886            "MOS0010 message should mention `#set`, got {:?}",
887            mos0010[0].message()
888        );
889        // Recovery: the next line must parse as its own paragraph,
890        // not get eaten as the directive body.
891        assert!(
892            r.tree.items.iter().any(|i| {
893                i.as_paragraph()
894                    .is_some_and(|(inlines, _)| inlines.iter().any(|x| x.text.contains("body")))
895            }),
896            "expected a recovered `body` paragraph, got items {:?}",
897            r.tree.items
898        );
899    }
900
901    #[test]
902    fn set_missing_colon_emits_mos0025() {
903        let r = parse_str("#set page(paper \"A4\")\n");
904        assert!(
905            r.diagnostics
906                .iter()
907                .any(|d| d.def().code() == codes::MOS0025.code()),
908            "expected MOS0025, got {:?}",
909            r.diagnostics
910        );
911    }
912
913    #[test]
914    fn set_positional_arg_emits_mos0025() {
915        let r = parse_str("#set page(\"A4\")\n");
916        assert!(
917            r.diagnostics
918                .iter()
919                .any(|d| d.def().code() == codes::MOS0025.code())
920        );
921    }
922
923    #[test]
924    fn unterminated_set_block_errors() {
925        let r = parse_str("#set page(\n  paper: \"A4\",\n");
926        assert!(r.has_errors());
927    }
928
929    #[test]
930    fn trailing_content_after_set_block_diagnoses_and_recovers() {
931        // Prior behaviour swallowed everything between `)` and the
932        // next `\n`. The parser now emits MOS0019 and leaves the
933        // trailing bytes in place so they parse as a paragraph.
934        let r = parse_str("#set page(paper: \"A4\") leftover\n");
935        assert!(
936            r.diagnostics
937                .iter()
938                .any(|d| d.def().code() == codes::MOS0019.code()),
939            "expected MOS0019 diagnostic, got {:?}",
940            r.diagnostics
941        );
942        assert!(r.tree.items.iter().any(|i| i.as_set().is_some()));
943        assert!(r.tree.items.iter().any(|i| {
944            i.as_paragraph()
945                .is_some_and(|(inlines, _)| inlines.iter().any(|x| x.text.contains("leftover")))
946        }));
947    }
948
949    #[test]
950    fn set_block_followed_by_horizontal_whitespace_then_newline_is_ok() {
951        let r = parse_str("#set page(paper: \"A4\")  \t\n");
952        assert!(!r.has_errors(), "{:?}", r.diagnostics);
953        assert_eq!(r.tree.items.len(), 1);
954    }
955
956    #[test]
957    fn set_with_string_containing_paren() {
958        let r = parse_str("#set foo(label: \"closes ) inside\")\n");
959        assert!(!r.has_errors(), "{:?}", r.diagnostics);
960        assert_eq!(r.tree.items.len(), 1);
961    }
962
963    #[test]
964    fn equals_without_space_is_paragraph() {
965        let r = parse_str("=notaheading\n");
966        assert!(!r.has_errors());
967        assert!(r.tree.items[0].as_paragraph().is_some());
968    }
969
970    #[test]
971    fn heading_span_is_within_source() {
972        let src = "= Title\n";
973        let r = parse_str(src);
974        let (_, _, span) = r.tree.items[0].as_heading().unwrap();
975        assert_eq!(&src[span.start()..span.end()], "= Title");
976    }
977
978    #[test]
979    fn crlf_line_endings_handled() {
980        let r = parse_str("= Title\r\nbody\r\n");
981        assert!(!r.has_errors());
982        assert_eq!(r.tree.items.len(), 2);
983    }
984
985    #[test]
986    fn set_prefix_without_token_boundary_stays_paragraph() {
987        // `#setting` is not the `#set` keyword. The parser must not
988        // route it to the set-block path and emit a spurious error.
989        let r = parse_str("#setting up\n");
990        assert!(!r.has_errors(), "{:?}", r.diagnostics);
991        assert!(r.tree.items[0].as_paragraph().is_some());
992    }
993
994    #[test]
995    fn set_prefix_followed_by_paren_is_set_block() {
996        // No whitespace, but `(` is also a valid token boundary.
997        let r = parse_str("#set(name: \"x\")\n");
998        // Either the parser recognises this as a set block with no
999        // identifier (MOS0010) or it parses as paragraph; what matters
1000        // is that it does NOT panic and returns a structured result.
1001        // We document the current behaviour here: `#set` is treated
1002        // as the keyword and `name` is parsed as the body identifier
1003        //; see `at_set_keyword`. This guards against regression.
1004        assert_eq!(r.tree.items.len() + r.diagnostics.len(), 1);
1005    }
1006
1007    #[test]
1008    fn paragraph_inline_spans_align_with_crlf_source() {
1009        // Regression for the CRLF byte-offset bug: when the paragraph
1010        // contains `\r\n` between its lines, inline spans on the
1011        // second line must still index into the original source.
1012        let src = "first\r\n*x*\r\n";
1013        let r = parse_str(src);
1014        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1015        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1016        // The emphasis run is the byte sequence `*x*` on the second
1017        // line. Its span must point at exactly those three bytes in
1018        // the original source, regardless of the CR before it.
1019        let emph = inlines
1020            .iter()
1021            .find(|i| i.kind == InlineKind::Emphasis)
1022            .expect("emphasis inline");
1023        assert_eq!(&src[emph.span.start()..emph.span.end()], "*x*");
1024        assert_eq!(emph.text, "x");
1025    }
1026
1027    #[test]
1028    fn heading_with_trailing_label_attaches() {
1029        let src = "= Methods <sec:methods>\n";
1030        let r = parse_str(src);
1031        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1032        let item = &r.tree.items[0];
1033        let (_, inlines, _) = item.as_heading().unwrap();
1034        assert_eq!(item.label(), Some("sec:methods"));
1035        assert_eq!(
1036            item.label_span().map(|span| &src[span.start()..span.end()]),
1037            Some("sec:methods")
1038        );
1039        assert_eq!(inlines.len(), 1);
1040        assert_eq!(inlines[0].text, "Methods");
1041    }
1042
1043    #[test]
1044    fn heading_label_with_trailing_content_warns_and_suggests_reorder() {
1045        // A `<label>` followed by content is not the trailing element, so it is
1046        // not recognised as a declaration (MOS0048). The fix moves it to the end.
1047        let src = "= Title <intro> [@k]\n";
1048        let r = parse_str(src);
1049        let diag = r
1050            .diagnostics
1051            .iter()
1052            .find(|d| d.def().code() == codes::MOS0048.code())
1053            .expect("MOS0048 for the misplaced heading label");
1054        assert_eq!(diag.severity(), Severity::Warning);
1055        // The label was not attached; it stays in the heading text.
1056        assert_eq!(r.tree.items[0].label(), None);
1057        let suggestions = diag.suggestions();
1058        assert_eq!(suggestions.len(), 2, "two safe fixes, got {suggestions:?}");
1059        assert_eq!(suggestions[0].replacement, "Title [@k] <intro>");
1060        assert_eq!(
1061            &src[suggestions[1].span.start()..suggestions[1].span.end()],
1062            "<"
1063        );
1064        assert_eq!(suggestions[1].replacement, "\\<");
1065    }
1066
1067    #[test]
1068    fn misplaced_heading_label_can_suggest_literal_angle_escape() {
1069        let src = "= The <head> element\n";
1070        let r = parse_str(src);
1071        let diag = r
1072            .diagnostics
1073            .iter()
1074            .find(|d| d.def().code() == codes::MOS0048.code())
1075            .expect("MOS0048 for the non-trailing angle token");
1076
1077        let suggestions = diag.suggestions();
1078        assert_eq!(
1079            suggestions.len(),
1080            2,
1081            "reorder + escape fixes: {suggestions:?}"
1082        );
1083        let escaped = &suggestions[1];
1084        assert_eq!(&src[escaped.span.start()..escaped.span.end()], "<");
1085        assert_eq!(escaped.replacement, "\\<");
1086    }
1087
1088    #[test]
1089    fn trailing_heading_label_does_not_warn_misplaced() {
1090        // A correctly trailing label attaches and never trips MOS0048.
1091        let r = parse_str("= Title <intro>\n");
1092        assert!(
1093            !r.diagnostics
1094                .iter()
1095                .any(|d| d.def().code() == codes::MOS0048.code()),
1096            "{:?}",
1097            r.diagnostics
1098        );
1099        assert_eq!(r.tree.items[0].label(), Some("intro"));
1100    }
1101
1102    #[test]
1103    fn escaped_angle_in_heading_is_literal_not_a_label() {
1104        // `\<head>` is escaped angle-bracket text: not a label, no MOS0048, and
1105        // the rendered inline text carries a literal `<` (so "The <head> element"
1106        // is writable as a heading).
1107        let r = parse_str("= The \\<head> element\n");
1108        assert_eq!(
1109            r.tree.items[0].label(),
1110            None,
1111            "escaped `\\<` is not a label"
1112        );
1113        assert!(
1114            !r.diagnostics
1115                .iter()
1116                .any(|d| d.def().code() == codes::MOS0048.code()),
1117            "escaped angle must not trip MOS0048: {:?}",
1118            r.diagnostics
1119        );
1120        let (_, inlines, _) = r.tree.items[0].as_heading().unwrap();
1121        let rendered: String = inlines.iter().map(|i| i.text.as_str()).collect();
1122        assert_eq!(rendered, "The <head> element");
1123    }
1124
1125    #[test]
1126    fn escaped_trailing_angle_is_not_swallowed_as_label() {
1127        // A `\<head>` at end of line must not be claimed by strip_trailing_label.
1128        let r = parse_str("= ends with \\<head>\n");
1129        assert_eq!(r.tree.items[0].label(), None);
1130        assert!(
1131            !r.diagnostics
1132                .iter()
1133                .any(|d| d.def().code() == codes::MOS0048.code()),
1134            "{:?}",
1135            r.diagnostics
1136        );
1137    }
1138
1139    #[test]
1140    fn double_backslash_before_label_still_attaches() {
1141        // Parity: `\\<intro>` is an escaped backslash (`\\`) followed by a real
1142        // label, so the trailing label still registers (even backslash count).
1143        let r = parse_str("= title \\\\<intro>\n");
1144        assert_eq!(r.tree.items[0].label(), Some("intro"));
1145    }
1146
1147    #[test]
1148    fn paragraph_with_leading_label_attaches() {
1149        let src = "<intro> body text\n";
1150        let r = parse_str(src);
1151        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1152        let item = &r.tree.items[0];
1153        let (inlines, _) = item.as_paragraph().unwrap();
1154        assert_eq!(item.label(), Some("intro"));
1155        assert_eq!(
1156            item.label_span().map(|span| &src[span.start()..span.end()]),
1157            Some("intro")
1158        );
1159        assert_eq!(inlines[0].text, "body text");
1160    }
1161
1162    #[test]
1163    fn at_label_produces_reference_inline() {
1164        let r = parse_str("see @sec:methods now\n");
1165        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1166        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1167        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
1168        assert_eq!(
1169            kinds,
1170            vec![InlineKind::Text, InlineKind::Reference, InlineKind::Text]
1171        );
1172        let r_inline = inlines
1173            .iter()
1174            .find(|i| i.kind == InlineKind::Reference)
1175            .unwrap();
1176        assert_eq!(r_inline.text, "sec:methods");
1177    }
1178
1179    #[test]
1180    fn at_page_produces_page_reference_inline() {
1181        let r = parse_str("see @page(fig:wells) now\n");
1182        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1183        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1184        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
1185        assert_eq!(
1186            kinds,
1187            vec![
1188                InlineKind::Text,
1189                InlineKind::PageReference,
1190                InlineKind::Text
1191            ]
1192        );
1193        let page_ref = inlines
1194            .iter()
1195            .find(|i| i.kind == InlineKind::PageReference)
1196            .unwrap();
1197        // The `page(` wrapper and `)` are stripped; payload is the bare label.
1198        assert_eq!(page_ref.text, "fig:wells");
1199    }
1200
1201    #[test]
1202    fn reference_inlines_carry_label_identifier_span() {
1203        // The parser records, for `@label` and `@page(label)`, the source span
1204        // of the bare identifier: excluding the `@`, the `@page(` prefix, and
1205        // the `)`, so the lowerer can stamp it for editor rename (issue #116).
1206        let src = "see @sec:methods and @page(fig:wells)\n";
1207        let r = parse_str(src);
1208        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1209        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1210
1211        let reference = inlines
1212            .iter()
1213            .find(|i| i.kind == InlineKind::Reference)
1214            .unwrap();
1215        let ref_span = reference.label_span.as_ref().expect("reference label span");
1216        assert_eq!(&src[ref_span.start()..ref_span.end()], "sec:methods");
1217
1218        let page = inlines
1219            .iter()
1220            .find(|i| i.kind == InlineKind::PageReference)
1221            .unwrap();
1222        let page_span = page.label_span.as_ref().expect("page reference label span");
1223        assert_eq!(&src[page_span.start()..page_span.end()], "fig:wells");
1224
1225        // Non-reference inlines carry no label span.
1226        assert!(
1227            inlines
1228                .iter()
1229                .filter(|i| i.kind == InlineKind::Text)
1230                .all(|i| i.label_span.is_none()),
1231            "plain text inlines have no label span"
1232        );
1233    }
1234
1235    #[test]
1236    fn malformed_at_page_falls_back_to_ordinary_reference() {
1237        // `@page` with no `(label)` is just a cross-reference to a label named
1238        // "page"; the page-reference branch only fires for a well-formed
1239        // `@page(label)`.
1240        let r = parse_str("see @page now\n");
1241        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1242        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1243        assert!(
1244            inlines
1245                .iter()
1246                .any(|i| i.kind == InlineKind::Reference && i.text == "page")
1247        );
1248        assert!(!inlines.iter().any(|i| i.kind == InlineKind::PageReference));
1249    }
1250
1251    #[test]
1252    fn unterminated_at_page_paren_is_not_a_page_reference() {
1253        // `@page(` without a closing `)` after a label must not be swallowed as
1254        // a page reference; it falls back to the `@page` reference plus literal.
1255        let r = parse_str("see @page(oops now\n");
1256        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1257        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1258        assert!(!inlines.iter().any(|i| i.kind == InlineKind::PageReference));
1259        assert!(
1260            inlines
1261                .iter()
1262                .any(|i| i.kind == InlineKind::Reference && i.text == "page")
1263        );
1264    }
1265
1266    #[test]
1267    fn stray_at_warns_and_stays_text() {
1268        let r = parse_str("an @ symbol\n");
1269        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1270        assert!(
1271            r.diagnostics
1272                .iter()
1273                .any(|d| d.def().code() == codes::MOS0036.code())
1274        );
1275        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1276        assert!(!inlines.iter().any(|i| i.kind == InlineKind::Reference));
1277    }
1278
1279    #[test]
1280    fn heading_without_label_keeps_full_text() {
1281        let r = parse_str("= Just a title\n");
1282        let item = &r.tree.items[0];
1283        let (_, inlines, _) = item.as_heading().unwrap();
1284        assert_eq!(item.label(), None);
1285        assert_eq!(inlines[0].text, "Just a title");
1286    }
1287
1288    #[test]
1289    fn paragraph_with_angle_text_not_label() {
1290        // `<` inside paragraph body that isn't a leading label-only
1291        // token must be left as text.
1292        let r = parse_str("a < b > c\n");
1293        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1294        let item = &r.tree.items[0];
1295        assert_eq!(item.label(), None);
1296        let (inlines, _) = item.as_paragraph().unwrap();
1297        assert_eq!(inlines[0].text, "a < b > c");
1298    }
1299
1300    #[test]
1301    fn paragraph_inline_text_is_crlf_normalized() {
1302        // The raw slice contains `\r\n` between paragraph lines, but
1303        // the Inline.text payload should be `\n`-only so the same
1304        // source lowers identically on Windows and Unix.
1305        let src = "alpha\r\nbeta\r\n";
1306        let r = parse_str(src);
1307        assert!(!r.has_errors());
1308        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1309        assert!(
1310            inlines.iter().all(|i| !i.text.contains('\r')),
1311            "inline text should be CRLF-normalized: {:?}",
1312            inlines.iter().map(|i| &i.text).collect::<Vec<_>>()
1313        );
1314        // The first text run still spans the raw bytes including the
1315        // CR: only the *payload* is normalized.
1316        let text = inlines.iter().find(|i| i.kind == InlineKind::Text).unwrap();
1317        assert_eq!(text.text, "alpha\nbeta");
1318        assert_eq!(&src[text.span.start()..text.span.end()], "alpha\r\nbeta");
1319    }
1320
1321    #[test]
1322    fn image_directive_with_positional_path() {
1323        let r = parse_str("#image(\"scan.png\")\n");
1324        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1325        let (name, args, _) = r.tree.items[0].as_set().unwrap();
1326        assert_eq!(name, "image");
1327        assert_eq!(args.len(), 1);
1328        // Positional arg: `.key()` returns `None`, and the variant
1329        // pattern-matches as `SetArg::Positional`. Both forms are
1330        // exercised so test sites that prefer pattern-matching and
1331        // sites that prefer accessor methods both see the contract.
1332        assert!(matches!(args[0], SetArg::Positional { .. }));
1333        assert_eq!(args[0].key(), None);
1334        assert_eq!(args[0].value(), &SetValue::Str("scan.png".to_owned()));
1335    }
1336
1337    #[test]
1338    fn image_directive_with_positional_and_keyed_args() {
1339        let r = parse_str("#image(\"scan.png\", alt: \"a CTPA scan\", width: 200pt)\n");
1340        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1341        let (name, args, _) = r.tree.items[0].as_set().unwrap();
1342        assert_eq!(name, "image");
1343        assert_eq!(args.len(), 3);
1344        assert_eq!(args[0].key(), None);
1345        assert_eq!(args[1].key(), Some("alt"));
1346        assert_eq!(args[2].key(), Some("width"));
1347        assert_eq!(args[2].value(), &SetValue::Length(200.0, LengthUnit::Pt));
1348    }
1349
1350    #[test]
1351    fn figure_directive_with_keyed_args() {
1352        let r = parse_str("#figure(image: \"scan.png\", caption: \"A scan.\")\n");
1353        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1354        let (name, args, _) = r.tree.items[0].as_set().unwrap();
1355        assert_eq!(name, "figure");
1356        assert_eq!(args.len(), 2);
1357        assert_eq!(args[0].key(), Some("image"));
1358        assert_eq!(args[0].value(), &SetValue::Str("scan.png".to_owned()));
1359        assert_eq!(args[1].key(), Some("caption"));
1360    }
1361
1362    #[test]
1363    fn figure_directive_carries_numbering_controls() {
1364        // `numbered: false` arrives as a bare ident, `supplement:` as a
1365        // string. The parser carries them as ordinary keyed args; the eval
1366        // layer (issue #76) interprets them.
1367        let r = parse_str("#figure(image: \"scan.png\", numbered: false, supplement: \"Plate\")\n");
1368        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1369        let (name, args, _) = r.tree.items[0].as_set().unwrap();
1370        assert_eq!(name, "figure");
1371        assert_eq!(args.len(), 3);
1372        assert_eq!(args[1].key(), Some("numbered"));
1373        assert_eq!(args[1].value(), &SetValue::Ident("false".to_owned()));
1374        assert_eq!(args[2].key(), Some("supplement"));
1375        assert_eq!(args[2].value(), &SetValue::Str("Plate".to_owned()));
1376    }
1377
1378    #[test]
1379    fn figure_directive_positional_path() {
1380        // Pins the `#figure("…")` spelling the directive grammar
1381        // advertises; the eval layer treats the first positional arg
1382        // the same way `#image(...)` does, so the parser-level shape
1383        // must match `#image`'s.
1384        let r = parse_str("#figure(\"scan.png\")\n");
1385        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1386        let (name, args, _) = r.tree.items[0].as_set().unwrap();
1387        assert_eq!(name, "figure");
1388        assert_eq!(args.len(), 1);
1389        assert!(matches!(args[0], SetArg::Positional { .. }));
1390        assert_eq!(args[0].value(), &SetValue::Str("scan.png".to_owned()));
1391    }
1392
1393    #[test]
1394    fn bibliography_directive_with_positional_path() {
1395        // `#bibliography("refs.bib")` is the Typst-compatible spelling
1396        // that declares a bibliography source. It parses through the
1397        // same call-block grammar as `#image(...)`, so the first
1398        // positional string is the database path.
1399        let r = parse_str("#bibliography(\"refs.bib\")\n");
1400        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1401        assert_eq!(r.tree.items.len(), 1);
1402        assert_eq!(
1403            r.tree.items[0].directive_kind(),
1404            Some(DirectiveKind::Bibliography)
1405        );
1406        let (name, args, _) = r.tree.items[0].as_set().unwrap();
1407        assert_eq!(name, "bibliography");
1408        assert_eq!(args.len(), 1);
1409        assert!(matches!(args[0], SetArg::Positional { .. }));
1410        assert_eq!(args[0].key(), None);
1411        assert_eq!(args[0].value(), &SetValue::Str("refs.bib".to_owned()));
1412    }
1413
1414    #[test]
1415    fn bibliography_directive_with_named_path() {
1416        // The named `path:` form is accepted alongside the positional
1417        // shorthand, mirroring `#image(src:/path:)`.
1418        let r = parse_str("#bibliography(path: \"sources/refs.bib\")\n");
1419        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1420        let (name, args, _) = r.tree.items[0].as_set().unwrap();
1421        assert_eq!(name, "bibliography");
1422        assert_eq!(args.len(), 1);
1423        assert_eq!(args[0].key(), Some("path"));
1424        assert_eq!(
1425            args[0].value(),
1426            &SetValue::Str("sources/refs.bib".to_owned())
1427        );
1428    }
1429
1430    #[test]
1431    fn raw_blocks_preserve_body_text() {
1432        let r = parse_str("#code[[fn main() {\n    println(\"hi\");\n}]]\n");
1433        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1434        assert_eq!(r.tree.items.len(), 1);
1435        let raw = r.tree.items[0].as_raw_block();
1436        assert!(
1437            raw.is_some(),
1438            "expected raw block, got {:?}",
1439            r.tree.items[0]
1440        );
1441        if let Some(raw) = raw {
1442            assert_eq!(raw.kind, RawBlockKind::Code);
1443            assert!(raw.args.is_empty());
1444            assert_eq!(raw.label, None);
1445            assert_eq!(raw.text, "fn main() {\n    println(\"hi\");\n}");
1446        }
1447    }
1448
1449    #[test]
1450    fn raw_blocks_preserve_zero_equals_inner_brackets() {
1451        let r = parse_str("#code[[let x = vec![1, 2, 3];]]\n");
1452        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1453        let raw = r.tree.items[0].as_raw_block();
1454        assert!(
1455            raw.is_some(),
1456            "expected raw block, got {:?}",
1457            r.tree.items[0]
1458        );
1459        if let Some(raw) = raw {
1460            assert_eq!(raw.kind, RawBlockKind::Code);
1461            assert_eq!(raw.text, "let x = vec![1, 2, 3];");
1462        }
1463    }
1464
1465    #[test]
1466    fn raw_blocks_preserve_delimiter_like_text() {
1467        let r = parse_str("#pre[=[open \\] close ] and ]] close]=]\n");
1468        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1469        let raw = r.tree.items[0].as_raw_block();
1470        assert!(
1471            raw.is_some(),
1472            "expected raw block, got {:?}",
1473            r.tree.items[0]
1474        );
1475        if let Some(raw) = raw {
1476            assert_eq!(raw.kind, RawBlockKind::Pre);
1477            assert!(raw.args.is_empty());
1478            assert_eq!(raw.label, None);
1479            assert_eq!(raw.text, "open \\] close ] and ]] close");
1480        }
1481    }
1482
1483    #[test]
1484    fn raw_blocks_preserve_arguments_and_label() {
1485        let src = "#code(lang: \"rust\")[[fn main() {}]] <ex:code>\n";
1486        let r = parse_str(src);
1487        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1488        assert_eq!(r.tree.items.len(), 1);
1489
1490        let raw = r.tree.items[0].as_raw_block();
1491        assert!(
1492            raw.is_some(),
1493            "expected raw block, got {:?}",
1494            r.tree.items[0]
1495        );
1496        if let Some(raw) = raw {
1497            assert_eq!(raw.kind, RawBlockKind::Code);
1498            assert_eq!(raw.args.len(), 1);
1499            assert_eq!(raw.args[0].key(), Some("lang"));
1500            assert_eq!(raw.args[0].value(), &SetValue::Str("rust".to_owned()));
1501            assert_eq!(raw.text, "fn main() {}");
1502            assert_eq!(raw.label, Some("ex:code"));
1503            assert_eq!(
1504                raw.label_span.map(|span| &src[span.start()..span.end()]),
1505                Some("ex:code")
1506            );
1507        }
1508        assert_eq!(r.tree.items[0].label(), Some("ex:code"));
1509    }
1510
1511    #[test]
1512    fn raw_blocks_trim_leading_delimiter_newline_and_normalize_line_endings() {
1513        let r = parse_str("#code[[\r\n\tprintln!(\"hi\");\r\n]]\n");
1514        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1515        let raw = r.tree.items[0].as_raw_block();
1516        assert!(
1517            raw.is_some(),
1518            "expected raw block, got {:?}",
1519            r.tree.items[0]
1520        );
1521        if let Some(raw) = raw {
1522            assert_eq!(raw.text, "\tprintln!(\"hi\");\n");
1523        }
1524    }
1525
1526    #[test]
1527    fn bracket_raw_blocks_are_rejected() {
1528        let r = parse_str("#code[fn main() {}]\n");
1529        assert!(r.has_errors(), "{:?}", r.diagnostics);
1530        assert!(r.tree.items.is_empty(), "{:?}", r.tree.items);
1531        assert!(
1532            r.diagnostics
1533                .iter()
1534                .any(|d| d.message().contains("long brackets")),
1535            "{:?}",
1536            r.diagnostics
1537        );
1538    }
1539
1540    #[test]
1541    fn directive_prefix_without_token_boundary_stays_paragraph() {
1542        // `#imagery` and `#figures` are not directive keywords. They
1543        // must not be routed to the directive path.
1544        let r = parse_str("#imagery here\n");
1545        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1546        assert!(r.tree.items[0].as_paragraph().is_some());
1547    }
1548
1549    #[test]
1550    fn unterminated_image_directive_errors_with_mos0016() {
1551        let r = parse_str("#image(\n  alt: \"x\"\n");
1552        assert!(
1553            r.diagnostics
1554                .iter()
1555                .any(|d| d.def().code() == codes::MOS0016.code() && d.message().contains("#image")),
1556            "expected MOS0016 mentioning #image, got {:?}",
1557            r.diagnostics
1558        );
1559    }
1560
1561    #[test]
1562    fn directive_terminates_paragraph() {
1563        // A paragraph in progress must stop at the next directive so
1564        // the directive parses cleanly instead of being slurped into
1565        // the paragraph body.
1566        for (src, expected_kind, expected_name) in [
1567            (
1568                "body line\n#set document(title: \"x\")\nmore\n",
1569                DirectiveKind::Set,
1570                "document",
1571            ),
1572            (
1573                "body line\n#image(\"x.png\")\nmore\n",
1574                DirectiveKind::Image,
1575                "image",
1576            ),
1577            (
1578                "body line\n#figure(\"x.png\")\nmore\n",
1579                DirectiveKind::Figure,
1580                "figure",
1581            ),
1582            (
1583                "body line\n#bibliography(\"refs.bib\")\nmore\n",
1584                DirectiveKind::Bibliography,
1585                "bibliography",
1586            ),
1587        ] {
1588            let r = parse_str(src);
1589            assert!(!r.has_errors(), "{:?}", r.diagnostics);
1590            // Expect: paragraph, directive, paragraph.
1591            assert_eq!(r.tree.items.len(), 3);
1592            assert!(r.tree.items[0].as_paragraph().is_some());
1593            assert_eq!(r.tree.items[1].directive_kind(), Some(expected_kind));
1594            let (name, _, _) = r.tree.items[1].as_set().unwrap();
1595            assert_eq!(name, expected_name);
1596            assert!(r.tree.items[2].as_paragraph().is_some());
1597        }
1598    }
1599
1600    #[test]
1601    fn unordered_list_simple() {
1602        let r = parse_str("- a\n- b\n");
1603        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1604        assert_eq!(r.tree.items.len(), 1);
1605        let (ordered, items, _) = r.tree.items[0].as_list().unwrap();
1606        assert!(!ordered);
1607        assert_eq!(items.len(), 2);
1608        assert_eq!(items[0].inlines[0].text, "a");
1609        assert_eq!(items[1].inlines[0].text, "b");
1610        assert!(items[0].children.is_empty());
1611        assert!(items[1].children.is_empty());
1612    }
1613
1614    #[test]
1615    fn ordered_list_simple() {
1616        let r = parse_str("1. first\n2. second\n3. third\n");
1617        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1618        assert_eq!(r.tree.items.len(), 1);
1619        let (ordered, items, _) = r.tree.items[0].as_list().unwrap();
1620        assert!(ordered);
1621        assert_eq!(items.len(), 3);
1622        assert_eq!(items[0].inlines[0].text, "first");
1623        assert_eq!(items[1].inlines[0].text, "second");
1624        assert_eq!(items[2].inlines[0].text, "third");
1625    }
1626
1627    #[test]
1628    fn list_items_carry_inline_emphasis() {
1629        let r = parse_str("- plain\n- *italic* text\n");
1630        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1631        let (_, items, _) = r.tree.items[0].as_list().unwrap();
1632        let kinds: Vec<InlineKind> = items[1].inlines.iter().map(|i| i.kind).collect();
1633        assert_eq!(
1634            kinds,
1635            vec![InlineKind::Emphasis, InlineKind::Text],
1636            "got {:?}",
1637            items[1].inlines
1638        );
1639    }
1640
1641    #[test]
1642    fn nested_list_two_deep() {
1643        let src = "- outer 1\n  - inner a\n  - inner b\n- outer 2\n";
1644        let r = parse_str(src);
1645        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1646        assert_eq!(r.tree.items.len(), 1);
1647        let (_, items, _) = r.tree.items[0].as_list().unwrap();
1648        assert_eq!(items.len(), 2);
1649        assert_eq!(items[0].inlines[0].text, "outer 1");
1650        assert_eq!(items[1].inlines[0].text, "outer 2");
1651        assert_eq!(items[0].children.len(), 1);
1652        assert!(items[1].children.is_empty());
1653        let (nested_ordered, nested_items, _) = items[0].children[0].as_list().unwrap();
1654        assert!(!nested_ordered);
1655        assert_eq!(nested_items.len(), 2);
1656        assert_eq!(nested_items[0].inlines[0].text, "inner a");
1657        assert_eq!(nested_items[1].inlines[0].text, "inner b");
1658    }
1659
1660    #[test]
1661    fn mixed_prose_and_list() {
1662        let src = "Intro paragraph.\n\n- one\n- two\n\nClosing paragraph.\n";
1663        let r = parse_str(src);
1664        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1665        assert_eq!(r.tree.items.len(), 3);
1666        assert!(r.tree.items[0].as_paragraph().is_some());
1667        let (_, list_items, _) = r.tree.items[1].as_list().unwrap();
1668        assert_eq!(list_items.len(), 2);
1669        assert!(r.tree.items[2].as_paragraph().is_some());
1670    }
1671
1672    #[test]
1673    fn list_marker_breaks_running_paragraph() {
1674        // No blank line between paragraph and list; the marker still
1675        // opens a fresh block.
1676        let r = parse_str("paragraph line\n- item\n");
1677        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1678        assert_eq!(r.tree.items.len(), 2);
1679        assert!(r.tree.items[0].as_paragraph().is_some());
1680        let (_, items, _) = r.tree.items[1].as_list().unwrap();
1681        assert_eq!(items.len(), 1);
1682        assert_eq!(items[0].inlines[0].text, "item");
1683    }
1684
1685    #[test]
1686    fn ordered_renumbers_from_one_regardless_of_source_digits() {
1687        // The parser preserves the literal digits the user typed in
1688        // each item's text, but ordered_renumbering is the lowerer's /
1689        // layout's job. At parse time, the only thing we report is
1690        // that the items are ordered; the numbering source is the
1691        // item index, not the literal `5.` typed in source.
1692        let r = parse_str("5. five\n7. seven\n");
1693        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1694        let (ordered, items, _) = r.tree.items[0].as_list().unwrap();
1695        assert!(ordered);
1696        assert_eq!(items.len(), 2);
1697        assert_eq!(items[0].inlines[0].text, "five");
1698        assert_eq!(items[1].inlines[0].text, "seven");
1699    }
1700
1701    #[test]
1702    fn ordered_to_unordered_at_same_indent_splits_lists() {
1703        let r = parse_str("1. one\n2. two\n- three\n- four\n");
1704        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1705        assert_eq!(r.tree.items.len(), 2);
1706        let (a_ordered, a_items, _) = r.tree.items[0].as_list().unwrap();
1707        assert!(a_ordered);
1708        assert_eq!(a_items.len(), 2);
1709        let (b_ordered, b_items, _) = r.tree.items[1].as_list().unwrap();
1710        assert!(!b_ordered);
1711        assert_eq!(b_items.len(), 2);
1712    }
1713
1714    #[test]
1715    fn dash_without_space_is_paragraph() {
1716        // A bare `-foo` line is a paragraph, not a list; the marker
1717        // requires trailing whitespace.
1718        let r = parse_str("-foo\n");
1719        assert!(!r.has_errors());
1720        assert!(r.tree.items[0].as_paragraph().is_some());
1721    }
1722
1723    #[test]
1724    fn number_dot_without_space_is_paragraph() {
1725        // `1.foo` without trailing whitespace is not an ordered list
1726        // marker. (Even `1.` alone with no content is not: keeps the
1727        // parser conservative around inline numerals like `1.5`.)
1728        let r = parse_str("1.foo\n");
1729        assert!(!r.has_errors());
1730        assert!(r.tree.items[0].as_paragraph().is_some());
1731    }
1732
1733    #[test]
1734    fn list_terminated_by_blank_line() {
1735        let src = "- a\n- b\n\n- c\n";
1736        let r = parse_str(src);
1737        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1738        // Two separate lists, split by the blank line.
1739        assert_eq!(r.tree.items.len(), 2);
1740        let (_, a, _) = r.tree.items[0].as_list().unwrap();
1741        let (_, c, _) = r.tree.items[1].as_list().unwrap();
1742        assert_eq!(a.len(), 2);
1743        assert_eq!(c.len(), 1);
1744    }
1745
1746    #[test]
1747    fn list_item_span_covers_its_line() {
1748        let src = "- hello\n";
1749        let r = parse_str(src);
1750        let (_, items, _) = r.tree.items[0].as_list().unwrap();
1751        let span = &items[0].span;
1752        assert_eq!(&src[span.start()..span.end()], "- hello");
1753    }
1754
1755    #[test]
1756    fn nested_list_span_includes_children() {
1757        let src = "- a\n  - b\n";
1758        let r = parse_str(src);
1759        let (_, _, span) = r.tree.items[0].as_list().unwrap();
1760        // Outer list's span should reach to the end of the nested item.
1761        assert!(span.end() > src.find('b').unwrap());
1762    }
1763
1764    // ---------- line-break controls (issue #26) ----------
1765
1766    #[test]
1767    fn nbsp_is_preserved_inside_a_single_text_inline() {
1768        // U+00A0 NBSP is not ASCII whitespace; it must round-trip
1769        // through the parser as one logical word, distinct from a
1770        // regular space which would split the paragraph's text differently
1771        // downstream. Pinned so a future UAX #14-aware splitter can't
1772        // accidentally normalize NBSP to a regular space.
1773        let r = parse_str("Mr.\u{A0}Smith\n");
1774        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1775        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1776        assert_eq!(inlines.len(), 1, "got {inlines:?}");
1777        assert_eq!(inlines[0].kind, InlineKind::Text);
1778        assert!(
1779            inlines[0].text.contains('\u{A0}'),
1780            "expected NBSP in text payload, got {:?}",
1781            inlines[0].text
1782        );
1783        assert_eq!(inlines[0].text, "Mr.\u{A0}Smith");
1784    }
1785
1786    #[test]
1787    fn hard_break_double_backslash() {
1788        let r = parse_str("foo\\\\bar\n");
1789        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1790        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1791        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
1792        assert_eq!(
1793            kinds,
1794            vec![InlineKind::Text, InlineKind::HardBreak, InlineKind::Text],
1795            "got {inlines:?}"
1796        );
1797        assert_eq!(inlines[0].text, "foo");
1798        assert!(inlines[1].text.is_empty());
1799        assert_eq!(inlines[2].text, "bar");
1800    }
1801
1802    #[test]
1803    fn hard_break_double_in_a_row() {
1804        let r = parse_str("a\\\\\\\\b\n");
1805        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1806        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1807        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
1808        assert_eq!(
1809            kinds,
1810            vec![
1811                InlineKind::Text,
1812                InlineKind::HardBreak,
1813                InlineKind::HardBreak,
1814                InlineKind::Text,
1815            ],
1816            "got {inlines:?}"
1817        );
1818    }
1819
1820    #[test]
1821    fn hard_break_at_start_of_paragraph() {
1822        let r = parse_str("\\\\foo\n");
1823        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1824        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1825        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
1826        assert_eq!(kinds, vec![InlineKind::HardBreak, InlineKind::Text]);
1827        assert_eq!(inlines[1].text, "foo");
1828    }
1829
1830    #[test]
1831    fn hard_break_then_strong() {
1832        // `\\` flushes any pending text and slots cleanly between
1833        // adjacent inline styles. Regression check that the delimiter
1834        // scanner sees the right `text_start` after the hard break.
1835        let r = parse_str("a\\\\**b**\n");
1836        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1837        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1838        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
1839        assert_eq!(
1840            kinds,
1841            vec![InlineKind::Text, InlineKind::HardBreak, InlineKind::Strong],
1842            "got {inlines:?}"
1843        );
1844        assert_eq!(inlines[2].text, "b");
1845    }
1846
1847    #[test]
1848    fn lone_trailing_backslash_warns_with_mos0038() {
1849        let r = parse_str("foo\\\n");
1850        assert!(!r.has_errors());
1851        assert!(
1852            r.diagnostics
1853                .iter()
1854                .any(|d| d.def().code() == codes::MOS0038.code()
1855                    && d.severity() == Severity::Warning),
1856            "expected MOS0038 warning, got {:?}",
1857            r.diagnostics
1858        );
1859    }
1860
1861    #[test]
1862    fn backslash_before_non_escape_byte_is_silent_literal() {
1863        // `\` followed by a byte we don't recognise as an escape
1864        // (`*`, `@`, `x`, drive-letter path, etc.) must not emit a
1865        // diagnostic -- the prior contract was "backslash is literal",
1866        // and only the trailing-`\` case crosses the bar where a
1867        // warning helps the author. Three samples cover the cases that
1868        // previously fired spurious MOS0038s.
1869        for src in [
1870            "foo \\* bar\n",
1871            "see C:\\Temp\\file\n",
1872            "stray \\x literal\n",
1873        ] {
1874            let r = parse_str(src);
1875            assert!(!r.has_errors(), "src {src:?}: {:?}", r.diagnostics);
1876            assert!(
1877                !r.diagnostics
1878                    .iter()
1879                    .any(|d| d.def().code() == codes::MOS0038.code()),
1880                "src {src:?} produced unexpected MOS0038: {:?}",
1881                r.diagnostics
1882            );
1883        }
1884    }
1885
1886    #[test]
1887    fn soft_hyphen_shorthand_expands_to_u00ad() {
1888        // `\-` is a soft-hyphen shorthand: it expands inline to a
1889        // literal U+00AD inside the current text run. No new IR
1890        // variant -- SHY is just a codepoint that downstream shaping
1891        // strips before rendering.
1892        let r = parse_str("a\\-b\n");
1893        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1894        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1895        assert_eq!(inlines.len(), 1, "got {inlines:?}");
1896        assert_eq!(inlines[0].kind, InlineKind::Text);
1897        assert_eq!(inlines[0].text, "a\u{AD}b");
1898    }
1899
1900    #[test]
1901    fn soft_hyphen_span_covers_the_consumed_source_bytes() {
1902        // Regression: the `\-` shorthand was advancing `text_start`
1903        // past the consumed bytes without recording where the run
1904        // originally started, so the emitted Inline carried a
1905        // zero-width span pointing at the post-`\-` bytes only.
1906        let src = "a\\-b\n";
1907        let r = parse_str(src);
1908        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1909        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1910        assert_eq!(inlines.len(), 1, "got {inlines:?}");
1911        assert_eq!(inlines[0].text, "a\u{AD}b");
1912        // The span should cover all four source bytes of `a\-b`,
1913        // not just the trailing `b`. Newline is not part of the
1914        // paragraph inline run.
1915        assert_eq!(
1916            inlines[0].span.end() - inlines[0].span.start(),
1917            4,
1918            "expected span over `a\\-b` (4 bytes), got {:?}",
1919            inlines[0].span
1920        );
1921    }
1922
1923    #[test]
1924    fn soft_hyphen_shorthand_repeats_in_one_run() {
1925        // Multiple `\-` shorthands inside one text run accumulate into
1926        // a single Text inline -- the pending buffer flushes only at
1927        // run boundaries, not at every escape.
1928        let r = parse_str("su\\-per\\-cali\n");
1929        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1930        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1931        assert_eq!(inlines.len(), 1, "got {inlines:?}");
1932        assert_eq!(inlines[0].kind, InlineKind::Text);
1933        assert_eq!(inlines[0].text, "su\u{AD}per\u{AD}cali");
1934    }
1935
1936    #[test]
1937    fn literal_nbsp_codepoint_round_trips_through_emphasis() {
1938        // Belt-and-suspenders: NBSP inside an emphasis run also survives
1939        // unchanged (no whitespace-style normalization at the inline
1940        // boundary).
1941        let r = parse_str("*Mr.\u{A0}Smith*\n");
1942        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1943        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1944        assert_eq!(inlines.len(), 1, "got {inlines:?}");
1945        assert_eq!(inlines[0].kind, InlineKind::Emphasis);
1946        assert_eq!(inlines[0].text, "Mr.\u{A0}Smith");
1947    }
1948
1949    // -----------------------------------------------------------------
1950    // Citation syntax (MVP 4 slice)
1951    //
1952    // The chosen citation form is `[@key]`. The key alphabet matches
1953    // the existing label alphabet (`[A-Za-z0-9_:.-]`, see
1954    // `support::scan_label_chars`). A single key per `[@…]` group is
1955    // the only form recognised in this slice: list forms like
1956    // `[@a; @b]` and prefix/suffix bodies (`[see @key, p. 33]`) are
1957    // deferred to a later bibliography slice and parse here as
1958    // literal text. Malformed citations (`[@`, `[@key`, `[@]`) emit a
1959    // recoverable `MOS0039` warning and fall through to literal text;
1960    // the parser does not panic on any input.
1961    // -----------------------------------------------------------------
1962
1963    #[test]
1964    fn citation_basic_emits_citation_inline_with_key_and_span() {
1965        let src = "see [@smith2024] for details\n";
1966        let r = parse_str(src);
1967        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1968        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1969        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
1970        assert_eq!(
1971            kinds,
1972            vec![InlineKind::Text, InlineKind::Citation, InlineKind::Text],
1973            "got {inlines:?}",
1974        );
1975        let citation = &inlines[1];
1976        assert_eq!(citation.text, "smith2024");
1977        // Span covers `[@smith2024]`; the full source extent, not
1978        // just the key.
1979        let span_text = &src[citation.span.start()..citation.span.end()];
1980        assert_eq!(span_text, "[@smith2024]");
1981    }
1982
1983    #[test]
1984    fn citation_key_accepts_label_alphabet() {
1985        // Same alphabet as labels: alnum + `_`, `-`, `:`, `.`.
1986        let r = parse_str("[@bib:knuth_84.tex-2]\n");
1987        assert!(!r.has_errors(), "{:?}", r.diagnostics);
1988        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
1989        assert_eq!(inlines.len(), 1, "got {inlines:?}");
1990        assert_eq!(inlines[0].kind, InlineKind::Citation);
1991        assert_eq!(inlines[0].text, "bib:knuth_84.tex-2");
1992    }
1993
1994    #[test]
1995    fn citation_bare_bracket_stays_literal_text() {
1996        // A bare `[` (no immediate `@`) must not trigger the citation
1997        // branch and must not emit a warning: `[` is a freely
1998        // available character in prose.
1999        let r = parse_str("write [this] not that\n");
2000        assert!(!r.has_errors(), "{:?}", r.diagnostics);
2001        assert!(
2002            r.diagnostics.is_empty(),
2003            "bare `[` should not warn, got {:?}",
2004            r.diagnostics,
2005        );
2006        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
2007        assert_eq!(inlines.len(), 1, "got {inlines:?}");
2008        assert_eq!(inlines[0].kind, InlineKind::Text);
2009        assert!(inlines[0].text.contains("[this]"));
2010    }
2011
2012    #[test]
2013    fn citation_unterminated_warns_and_recovers_as_text() {
2014        // `[@key` with no closing `]` before end-of-paragraph must
2015        // emit `MOS0039` and leave the source bytes as literal text.
2016        // Critically, recovery must NOT let the `@key` chars fall
2017        // through to the `@`-reference branch; that would inject a
2018        // phantom `Reference` inline and trip the resolver's
2019        // unknown-label diagnostic (`MOS0033`) on what was a citation
2020        // mistake, not a label mistake.
2021        let r = parse_str("see [@smith2024 missing close\n");
2022        assert!(!r.has_errors(), "{:?}", r.diagnostics);
2023        assert!(
2024            r.diagnostics
2025                .iter()
2026                .any(|d| d.def().code() == codes::MOS0039.code()
2027                    && d.severity() == Severity::Warning),
2028            "expected MOS0039, got {:?}",
2029            r.diagnostics,
2030        );
2031        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
2032        assert!(
2033            inlines.iter().all(|i| i.kind != InlineKind::Citation),
2034            "unterminated citation must not emit a Citation node: {inlines:?}",
2035        );
2036        assert!(
2037            inlines.iter().all(|i| i.kind != InlineKind::Reference),
2038            "unterminated citation must not leak a phantom Reference: {inlines:?}",
2039        );
2040    }
2041
2042    #[test]
2043    fn citation_empty_key_warns_and_recovers_as_text() {
2044        // `[@]` with no key is malformed.
2045        let r = parse_str("look [@] here\n");
2046        assert!(!r.has_errors(), "{:?}", r.diagnostics);
2047        assert!(
2048            r.diagnostics
2049                .iter()
2050                .any(|d| d.def().code() == codes::MOS0039.code()),
2051            "expected MOS0039, got {:?}",
2052            r.diagnostics,
2053        );
2054        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
2055        assert!(inlines.iter().all(|i| i.kind != InlineKind::Citation));
2056    }
2057
2058    #[test]
2059    fn citation_multi_key_form_is_deferred_and_does_not_leak_references() {
2060        // `[@a; @b]` is the pandoc multi-key form. This slice does
2061        // NOT support it: recognising it as one citation list is a
2062        // future bibliography slice (MVP 4 follow-up). Until then it
2063        // must surface as a single `MOS0039` warning and consume the
2064        // whole `[@…]` extent so neither `@a` nor `@b` slips out as
2065        // a `Reference` inline. Without aggressive recovery the
2066        // resolver would later raise `MOS0033` on `@a`/`@b` because
2067        // they're not labelled blocks; that would be doubly wrong:
2068        // the diagnostic would point at the wrong feature and would
2069        // promote a parser warning into a resolver error.
2070        let r = parse_str("compare [@smith2024; @jones2025] now\n");
2071        assert!(!r.has_errors(), "{:?}", r.diagnostics);
2072        let w026: Vec<_> = r
2073            .diagnostics
2074            .iter()
2075            .filter(|d| d.def().code() == codes::MOS0039.code())
2076            .collect();
2077        assert_eq!(w026.len(), 1, "expected exactly one MOS0039, got {w026:?}");
2078        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
2079        assert!(
2080            inlines.iter().all(|i| i.kind != InlineKind::Citation),
2081            "multi-key form must not emit a Citation node: {inlines:?}",
2082        );
2083        assert!(
2084            inlines.iter().all(|i| i.kind != InlineKind::Reference),
2085            "multi-key form must not leak phantom References: {inlines:?}",
2086        );
2087    }
2088
2089    #[test]
2090    fn citation_inside_emphasis_round_trips() {
2091        // Citations nest inside styled runs just like any other inline.
2092        let r = parse_str("*see [@smith2024]*\n");
2093        assert!(!r.has_errors(), "{:?}", r.diagnostics);
2094        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
2095        let kinds: Vec<InlineKind> = inlines.iter().map(|i| i.kind).collect();
2096        assert_eq!(
2097            kinds,
2098            vec![InlineKind::Emphasis, InlineKind::Citation],
2099            "got {inlines:?}",
2100        );
2101        assert_eq!(inlines[1].text, "smith2024");
2102    }
2103
2104    #[test]
2105    fn citation_multiple_keys_each_emit_one_node() {
2106        // Two adjacent `[@a]` `[@b]` groups become two Citation
2107        // inlines. List form `[@a; @b]` is intentionally NOT
2108        // recognised in this slice and parses as a malformed
2109        // citation + literal text.
2110        let r = parse_str("[@first] and [@second]\n");
2111        assert!(!r.has_errors(), "{:?}", r.diagnostics);
2112        let (inlines, _) = r.tree.items[0].as_paragraph().unwrap();
2113        let citation_keys: Vec<&str> = inlines
2114            .iter()
2115            .filter(|i| i.kind == InlineKind::Citation)
2116            .map(|i| i.text.as_str())
2117            .collect();
2118        assert_eq!(citation_keys, vec!["first", "second"]);
2119    }
2120}