Skip to main content

mos_parse/
syntax.rs

1use std::path::PathBuf;
2
3use mos_core::{Diagnostic, Severity, SourceSpan};
4
5/// Concrete syntax tree for a single `.mos` source file.
6#[derive(Debug, Clone)]
7pub struct SyntaxTree {
8    pub file: PathBuf,
9    pub items: Vec<Item>,
10}
11
12/// Top-level construct in a `.mos` file.
13#[derive(Debug, Clone)]
14pub enum Item {
15    /// `= Title`, `== Subtitle`, `=== Subsubtitle`. A trailing
16    /// `<label>` token after the title attaches to this heading.
17    Heading {
18        level: u8,
19        inlines: Vec<Inline>,
20        label: Option<String>,
21        label_span: Option<SourceSpan>,
22        span: SourceSpan,
23    },
24    /// One or more consecutive non-blank lines that are not a heading
25    /// and not a `#set` block. A leading `<label>` token (possibly
26    /// preceded by ASCII whitespace) attaches to this paragraph.
27    Paragraph {
28        inlines: Vec<Inline>,
29        label: Option<String>,
30        label_span: Option<SourceSpan>,
31        span: SourceSpan,
32    },
33    /// `#set name(...)`, `#image(...)`, `#figure(...)`. The body is
34    /// lexed into typed `(key, value)` args; semantic validation
35    /// (known target/key, type coercion, sanity floors) happens in
36    /// the lowerer. `kind` distinguishes the `#set`-style configuration
37    /// directive from standalone calls like `#image` and `#figure`,
38    /// which the lowerer dispatches to dedicated paths.
39    Set {
40        kind: DirectiveKind,
41        name: String,
42        args: Vec<SetArg>,
43        span: SourceSpan,
44    },
45    /// Raw preformatted text or code block. Both forms preserve their
46    /// long-bracket body as text; the kind leaves room for later styling
47    /// or language-aware code rendering.
48    RawBlock {
49        kind: RawBlockKind,
50        args: Vec<SetArg>,
51        text: String,
52        label: Option<String>,
53        label_span: Option<SourceSpan>,
54        span: SourceSpan,
55    },
56    /// A bullet (`- `) or numbered (`\d+\. `) list. Sibling items at
57    /// the same indent are grouped under one list; deeper indents
58    /// become nested lists hanging off the most recent item. Numbered
59    /// lists always renumber from 1 in MVP: explicit `start: N` is
60    /// deferred.
61    List {
62        ordered: bool,
63        items: Vec<ListItem>,
64        span: SourceSpan,
65    },
66}
67
68/// One entry inside an [`Item::List`]. `inlines` is the item's own
69/// text (markers stripped, parsed with the same inline tokenizer as
70/// paragraphs); `children` carries nested blocks, currently restricted
71/// to further [`Item::List`]s per the MVP scope.
72#[derive(Debug, Clone)]
73pub struct ListItem {
74    pub inlines: Vec<Inline>,
75    pub children: Vec<Item>,
76    pub span: SourceSpan,
77}
78
79/// Tag for the directive shapes [`Item::Set`] can represent: the
80/// `#set <target>(...)` configuration directive vs the standalone
81/// `#image(...)`, `#figure(...)`, and `#bibliography(...)` calls. The
82/// lowerer dispatches on this rather than the [`Item::Set::name`] string
83/// so `#set image(...)` can never collide with `#image(...)`.
84#[derive(Debug, Clone, Copy, Eq, PartialEq)]
85pub enum DirectiveKind {
86    /// `#set <name>(...)`: sets defaults on a style target.
87    Set,
88    /// `#image("path", ...)`: raster image directive.
89    Image,
90    /// `#figure(image: ..., caption: ...)`: captioned image container.
91    Figure,
92    /// `#bibliography("refs.bib")`: declares a bibliography source
93    /// database. The lowerer records the (source-relative) path so a
94    /// later BibTeX-parsing slice can read it; citation resolution and
95    /// rendering are not part of this directive.
96    Bibliography,
97}
98
99#[derive(Debug, Clone, Copy, Eq, PartialEq)]
100pub enum RawBlockKind {
101    Pre,
102    Code,
103}
104
105/// Borrowed view of an [`Item::RawBlock`] payload.
106#[derive(Debug, Clone, Copy)]
107pub struct RawBlockView<'a> {
108    pub kind: RawBlockKind,
109    pub args: &'a [SetArg],
110    pub text: &'a str,
111    pub label: Option<&'a str>,
112    pub label_span: Option<&'a SourceSpan>,
113    pub span: &'a SourceSpan,
114}
115
116/// One argument inside a directive body: either a `key: value`
117/// pair (the only form `#set` accepts) or a positional value (a
118/// leading string literal allowed on `#image(...)` / `#figure(...)`).
119///
120/// This used to be a struct with an empty-string `key` standing in
121/// for "positional," but that sentinel was a brittle public contract:
122/// any consumer that forgot the special-case would silently treat a
123/// positional path as a named arg called `""`. The enum form makes
124/// the two shapes explicit so the compiler can enforce exhaustive
125/// matches.
126#[derive(Debug, Clone)]
127pub enum SetArg {
128    /// A `key: value` argument. `key_span` covers the identifier
129    /// before the colon; `value_span` covers the literal.
130    Named {
131        key: String,
132        value: SetValue,
133        key_span: SourceSpan,
134        value_span: SourceSpan,
135    },
136    /// A leading positional value. The parser currently only accepts
137    /// string literals here (used for `#image("path.png")`); other
138    /// literal kinds in a positional slot would surface as a parse
139    /// error rather than land in this variant.
140    Positional {
141        value: SetValue,
142        value_span: SourceSpan,
143    },
144}
145
146impl SetArg {
147    /// Borrow the value carried by this argument, regardless of shape.
148    #[must_use]
149    pub fn value(&self) -> &SetValue {
150        match self {
151            Self::Named { value, .. } | Self::Positional { value, .. } => value,
152        }
153    }
154
155    /// The span covering the argument's value literal.
156    #[must_use]
157    pub fn value_span(&self) -> &SourceSpan {
158        match self {
159            Self::Named { value_span, .. } | Self::Positional { value_span, .. } => value_span,
160        }
161    }
162
163    /// The key identifier for [`Self::Named`]; `None` for
164    /// [`Self::Positional`].
165    #[must_use]
166    pub fn key(&self) -> Option<&str> {
167        match self {
168            Self::Named { key, .. } => Some(key.as_str()),
169            Self::Positional { .. } => None,
170        }
171    }
172
173    /// The span covering the key identifier, for [`Self::Named`].
174    /// `None` for [`Self::Positional`].
175    #[must_use]
176    pub fn key_span(&self) -> Option<&SourceSpan> {
177        match self {
178            Self::Named { key_span, .. } => Some(key_span),
179            Self::Positional { .. } => None,
180        }
181    }
182}
183
184/// Literal values recognised inside a `#set` body. Full expression
185/// evaluation (`#let`, function calls, `if`) is deferred to MVP 5; this
186/// covers what the manifest examples actually use.
187#[derive(Debug, Clone, PartialEq)]
188pub enum SetValue {
189    Str(String),
190    Int(i64),
191    Float(f64),
192    Length(f64, LengthUnit),
193    Ident(String),
194}
195
196#[derive(Debug, Clone, Copy, Eq, PartialEq)]
197pub enum LengthUnit {
198    Mm,
199    Pt,
200    Em,
201}
202
203/// Inline run produced by the markup tokenizer.
204#[derive(Debug, Clone)]
205pub struct Inline {
206    pub kind: InlineKind,
207    pub text: String,
208    pub span: SourceSpan,
209    /// For [`InlineKind::Reference`] / [`InlineKind::PageReference`], the
210    /// source span of the label *identifier* alone; the `intro` in `@intro`
211    /// or `@page(intro)`, excluding the `@` sigil and the `@page(`…`)`
212    /// wrapper. The lowerer stamps it as the node's `label_span` so editor
213    /// features (rename) read the identifier range directly instead of
214    /// re-deriving it from [`Self::span`] geometry. `None` for every other
215    /// inline kind.
216    pub label_span: Option<SourceSpan>,
217}
218
219#[derive(Debug, Clone, Copy, Eq, PartialEq)]
220pub enum InlineKind {
221    Text,
222    Emphasis,
223    Strong,
224    BoldItalic,
225    Code,
226    /// `@label`: a cross-reference to a labelled block. The
227    /// [`Inline::text`] payload is the bare label name (no leading
228    /// `@`); the resolver rewrites it to the target's resolved text.
229    Reference,
230    /// `@page(label)`: a reference to the printed *page number* of a
231    /// labelled target. The [`Inline::text`] payload is the bare label name
232    /// (the `page(` wrapper and `)` stripped). Distinct from
233    /// [`Reference`](Self::Reference), which resolves to the target's section
234    /// or figure number; a page reference resolves to where the target lands,
235    /// which is only known after layout. Resolution runs through the
236    /// resolve↔layout fixpoint (issue #72); this slice parses and models the
237    /// reference but leaves it unresolved (placeholder text).
238    PageReference,
239    /// `[@key]`: a citation to a bibliography entry. The
240    /// [`Inline::text`] payload is the bare citation key (no leading
241    /// `[@` or trailing `]`); bibliography loading and rendering are
242    /// future work tracked under MVP 4. The key alphabet matches the
243    /// label alphabet (`[A-Za-z0-9_:.-]`); a single key per
244    /// `[@…]` group is the only form recognised in this slice: list
245    /// forms like `[@a; @b]` and prefix/suffix bodies are deferred.
246    Citation,
247    /// `\\`: a forced line break inside a paragraph. The line
248    /// breaks here without the extra leading a blank-line paragraph
249    /// break would give. Carries no text payload. The shorthand for
250    /// a soft hyphen `\-` lowers to a literal U+00AD inside a
251    /// surrounding [`InlineKind::Text`] run, not to a separate variant.
252    HardBreak,
253}
254
255impl Item {
256    /// Borrow the heading payload if `self` is [`Item::Heading`].
257    #[must_use]
258    pub fn as_heading(&self) -> Option<(u8, &[Inline], &SourceSpan)> {
259        if let Self::Heading {
260            level,
261            inlines,
262            span,
263            ..
264        } = self
265        {
266            Some((*level, inlines, span))
267        } else {
268            None
269        }
270    }
271
272    /// Borrow the paragraph payload if `self` is [`Item::Paragraph`].
273    #[must_use]
274    pub fn as_paragraph(&self) -> Option<(&[Inline], &SourceSpan)> {
275        if let Self::Paragraph { inlines, span, .. } = self {
276            Some((inlines, span))
277        } else {
278            None
279        }
280    }
281
282    /// Borrow the directive payload if `self` is [`Item::Set`].
283    ///
284    /// The returned tuple is `(name, args, span)`; the caller can also
285    /// reach [`DirectiveKind`] via [`Self::directive_kind`]. The
286    /// accessor name is retained for back-compat; every existing
287    /// caller pre-dates the `#image`/`#figure` directives and only
288    /// looks at name/args/span.
289    #[must_use]
290    pub fn as_set(&self) -> Option<(&str, &[SetArg], &SourceSpan)> {
291        if let Self::Set {
292            name, args, span, ..
293        } = self
294        {
295            Some((name.as_str(), args.as_slice(), span))
296        } else {
297            None
298        }
299    }
300
301    /// Borrow the raw block payload if `self` is [`Item::RawBlock`].
302    #[must_use]
303    pub fn as_raw_block(&self) -> Option<RawBlockView<'_>> {
304        if let Self::RawBlock {
305            kind,
306            args,
307            text,
308            label,
309            label_span,
310            span,
311        } = self
312        {
313            Some(RawBlockView {
314                kind: *kind,
315                args: args.as_slice(),
316                text: text.as_str(),
317                label: label.as_deref(),
318                label_span: label_span.as_ref(),
319                span,
320            })
321        } else {
322            None
323        }
324    }
325
326    /// Borrow the [`DirectiveKind`] tag if `self` is [`Item::Set`].
327    #[must_use]
328    pub fn directive_kind(&self) -> Option<DirectiveKind> {
329        if let Self::Set { kind, .. } = self {
330            Some(*kind)
331        } else {
332            None
333        }
334    }
335
336    /// Borrow the list payload if `self` is [`Item::List`]. The
337    /// returned tuple is `(ordered, items, span)`.
338    #[must_use]
339    pub fn as_list(&self) -> Option<(bool, &[ListItem], &SourceSpan)> {
340        if let Self::List {
341            ordered,
342            items,
343            span,
344        } = self
345        {
346            Some((*ordered, items.as_slice(), span))
347        } else {
348            None
349        }
350    }
351
352    /// Borrow the explicit `<label>` attached to this block, if any.
353    /// Returns `None` for [`Item::Set`] and [`Item::List`] (label
354    /// syntax is not yet defined on those blocks).
355    #[must_use]
356    pub fn label(&self) -> Option<&str> {
357        match self {
358            Self::Heading { label, .. }
359            | Self::Paragraph { label, .. }
360            | Self::RawBlock { label, .. } => label.as_deref(),
361            Self::Set { .. } | Self::List { .. } => None,
362        }
363    }
364
365    /// Borrow the source span covering only the label token text, if any.
366    /// The delimiters (`<`, `>`, or directive string quotes) are excluded so a
367    /// structured suggestion can replace just the label bytes.
368    #[must_use]
369    pub fn label_span(&self) -> Option<&SourceSpan> {
370        match self {
371            Self::Heading { label_span, .. }
372            | Self::Paragraph { label_span, .. }
373            | Self::RawBlock { label_span, .. } => label_span.as_ref(),
374            Self::Set { .. } | Self::List { .. } => None,
375        }
376    }
377}
378
379/// Output of [`crate::parse`]. Diagnostics may include warnings even
380/// when the tree is structurally usable; callers decide what to do per
381/// [`ParseResult::has_errors`].
382#[derive(Debug)]
383pub struct ParseResult {
384    pub tree: SyntaxTree,
385    pub diagnostics: Vec<Diagnostic>,
386}
387
388impl ParseResult {
389    #[must_use]
390    pub fn has_errors(&self) -> bool {
391        self.diagnostics
392            .iter()
393            .any(|d| d.severity() == Severity::Error)
394    }
395}