mos_parse/syntax.rs
1use std::path::PathBuf;
2
3use mos_core::{Diagnostic, Severity, SourceSpan};
4
5/// Concrete syntax tree for a single `.mos` source file.
6#[derive(Debug, Clone)]
7pub struct SyntaxTree {
8 pub file: PathBuf,
9 pub items: Vec<Item>,
10}
11
12/// Top-level construct in a `.mos` file.
13#[derive(Debug, Clone)]
14pub enum Item {
15 /// `= Title`, `== Subtitle`, `=== Subsubtitle`. A trailing
16 /// `<label>` token after the title attaches to this heading.
17 Heading {
18 level: u8,
19 inlines: Vec<Inline>,
20 label: Option<String>,
21 label_span: Option<SourceSpan>,
22 span: SourceSpan,
23 },
24 /// One or more consecutive non-blank lines that are not a heading
25 /// and not a `#set` block. A leading `<label>` token (possibly
26 /// preceded by ASCII whitespace) attaches to this paragraph.
27 Paragraph {
28 inlines: Vec<Inline>,
29 label: Option<String>,
30 label_span: Option<SourceSpan>,
31 span: SourceSpan,
32 },
33 /// `#set name(...)`, `#image(...)`, `#figure(...)`. The body is
34 /// lexed into typed `(key, value)` args; semantic validation
35 /// (known target/key, type coercion, sanity floors) happens in
36 /// the lowerer. `kind` distinguishes the `#set`-style configuration
37 /// directive from standalone calls like `#image` and `#figure`,
38 /// which the lowerer dispatches to dedicated paths.
39 Set {
40 kind: DirectiveKind,
41 name: String,
42 args: Vec<SetArg>,
43 span: SourceSpan,
44 },
45 /// Raw preformatted text or code block. Both forms preserve their
46 /// long-bracket body as text; the kind leaves room for later styling
47 /// or language-aware code rendering.
48 RawBlock {
49 kind: RawBlockKind,
50 args: Vec<SetArg>,
51 text: String,
52 label: Option<String>,
53 label_span: Option<SourceSpan>,
54 span: SourceSpan,
55 },
56 /// A bullet (`- `) or numbered (`\d+\. `) list. Sibling items at
57 /// the same indent are grouped under one list; deeper indents
58 /// become nested lists hanging off the most recent item. Numbered
59 /// lists always renumber from 1 in MVP: explicit `start: N` is
60 /// deferred.
61 List {
62 ordered: bool,
63 items: Vec<ListItem>,
64 span: SourceSpan,
65 },
66}
67
68/// One entry inside an [`Item::List`]. `inlines` is the item's own
69/// text (markers stripped, parsed with the same inline tokenizer as
70/// paragraphs); `children` carries nested blocks, currently restricted
71/// to further [`Item::List`]s per the MVP scope.
72#[derive(Debug, Clone)]
73pub struct ListItem {
74 pub inlines: Vec<Inline>,
75 pub children: Vec<Item>,
76 pub span: SourceSpan,
77}
78
79/// Tag for the directive shapes [`Item::Set`] can represent: the
80/// `#set <target>(...)` configuration directive vs the standalone
81/// `#image(...)`, `#figure(...)`, and `#bibliography(...)` calls. The
82/// lowerer dispatches on this rather than the [`Item::Set::name`] string
83/// so `#set image(...)` can never collide with `#image(...)`.
84#[derive(Debug, Clone, Copy, Eq, PartialEq)]
85pub enum DirectiveKind {
86 /// `#set <name>(...)`: sets defaults on a style target.
87 Set,
88 /// `#image("path", ...)`: raster image directive.
89 Image,
90 /// `#figure(image: ..., caption: ...)`: captioned image container.
91 Figure,
92 /// `#bibliography("refs.bib")`: declares a bibliography source
93 /// database. The lowerer records the (source-relative) path so a
94 /// later BibTeX-parsing slice can read it; citation resolution and
95 /// rendering are not part of this directive.
96 Bibliography,
97}
98
99#[derive(Debug, Clone, Copy, Eq, PartialEq)]
100pub enum RawBlockKind {
101 Pre,
102 Code,
103}
104
105/// Borrowed view of an [`Item::RawBlock`] payload.
106#[derive(Debug, Clone, Copy)]
107pub struct RawBlockView<'a> {
108 pub kind: RawBlockKind,
109 pub args: &'a [SetArg],
110 pub text: &'a str,
111 pub label: Option<&'a str>,
112 pub label_span: Option<&'a SourceSpan>,
113 pub span: &'a SourceSpan,
114}
115
116/// One argument inside a directive body: either a `key: value`
117/// pair (the only form `#set` accepts) or a positional value (a
118/// leading string literal allowed on `#image(...)` / `#figure(...)`).
119///
120/// This used to be a struct with an empty-string `key` standing in
121/// for "positional," but that sentinel was a brittle public contract:
122/// any consumer that forgot the special-case would silently treat a
123/// positional path as a named arg called `""`. The enum form makes
124/// the two shapes explicit so the compiler can enforce exhaustive
125/// matches.
126#[derive(Debug, Clone)]
127pub enum SetArg {
128 /// A `key: value` argument. `key_span` covers the identifier
129 /// before the colon; `value_span` covers the literal.
130 Named {
131 key: String,
132 value: SetValue,
133 key_span: SourceSpan,
134 value_span: SourceSpan,
135 },
136 /// A leading positional value. The parser currently only accepts
137 /// string literals here (used for `#image("path.png")`); other
138 /// literal kinds in a positional slot would surface as a parse
139 /// error rather than land in this variant.
140 Positional {
141 value: SetValue,
142 value_span: SourceSpan,
143 },
144}
145
146impl SetArg {
147 /// Borrow the value carried by this argument, regardless of shape.
148 #[must_use]
149 pub fn value(&self) -> &SetValue {
150 match self {
151 Self::Named { value, .. } | Self::Positional { value, .. } => value,
152 }
153 }
154
155 /// The span covering the argument's value literal.
156 #[must_use]
157 pub fn value_span(&self) -> &SourceSpan {
158 match self {
159 Self::Named { value_span, .. } | Self::Positional { value_span, .. } => value_span,
160 }
161 }
162
163 /// The key identifier for [`Self::Named`]; `None` for
164 /// [`Self::Positional`].
165 #[must_use]
166 pub fn key(&self) -> Option<&str> {
167 match self {
168 Self::Named { key, .. } => Some(key.as_str()),
169 Self::Positional { .. } => None,
170 }
171 }
172
173 /// The span covering the key identifier, for [`Self::Named`].
174 /// `None` for [`Self::Positional`].
175 #[must_use]
176 pub fn key_span(&self) -> Option<&SourceSpan> {
177 match self {
178 Self::Named { key_span, .. } => Some(key_span),
179 Self::Positional { .. } => None,
180 }
181 }
182}
183
184/// Literal values recognised inside a `#set` body. Full expression
185/// evaluation (`#let`, function calls, `if`) is deferred to MVP 5; this
186/// covers what the manifest examples actually use.
187#[derive(Debug, Clone, PartialEq)]
188pub enum SetValue {
189 Str(String),
190 Int(i64),
191 Float(f64),
192 Length(f64, LengthUnit),
193 Ident(String),
194}
195
196#[derive(Debug, Clone, Copy, Eq, PartialEq)]
197pub enum LengthUnit {
198 Mm,
199 Pt,
200 Em,
201}
202
203/// Inline run produced by the markup tokenizer.
204#[derive(Debug, Clone)]
205pub struct Inline {
206 pub kind: InlineKind,
207 pub text: String,
208 pub span: SourceSpan,
209 /// For [`InlineKind::Reference`] / [`InlineKind::PageReference`], the
210 /// source span of the label *identifier* alone; the `intro` in `@intro`
211 /// or `@page(intro)`, excluding the `@` sigil and the `@page(`…`)`
212 /// wrapper. The lowerer stamps it as the node's `label_span` so editor
213 /// features (rename) read the identifier range directly instead of
214 /// re-deriving it from [`Self::span`] geometry. `None` for every other
215 /// inline kind.
216 pub label_span: Option<SourceSpan>,
217}
218
219#[derive(Debug, Clone, Copy, Eq, PartialEq)]
220pub enum InlineKind {
221 Text,
222 Emphasis,
223 Strong,
224 BoldItalic,
225 Code,
226 /// `@label`: a cross-reference to a labelled block. The
227 /// [`Inline::text`] payload is the bare label name (no leading
228 /// `@`); the resolver rewrites it to the target's resolved text.
229 Reference,
230 /// `@page(label)`: a reference to the printed *page number* of a
231 /// labelled target. The [`Inline::text`] payload is the bare label name
232 /// (the `page(` wrapper and `)` stripped). Distinct from
233 /// [`Reference`](Self::Reference), which resolves to the target's section
234 /// or figure number; a page reference resolves to where the target lands,
235 /// which is only known after layout. Resolution runs through the
236 /// resolve↔layout fixpoint (issue #72); this slice parses and models the
237 /// reference but leaves it unresolved (placeholder text).
238 PageReference,
239 /// `[@key]`: a citation to a bibliography entry. The
240 /// [`Inline::text`] payload is the bare citation key (no leading
241 /// `[@` or trailing `]`); bibliography loading and rendering are
242 /// future work tracked under MVP 4. The key alphabet matches the
243 /// label alphabet (`[A-Za-z0-9_:.-]`); a single key per
244 /// `[@…]` group is the only form recognised in this slice: list
245 /// forms like `[@a; @b]` and prefix/suffix bodies are deferred.
246 Citation,
247 /// `\\`: a forced line break inside a paragraph. The line
248 /// breaks here without the extra leading a blank-line paragraph
249 /// break would give. Carries no text payload. The shorthand for
250 /// a soft hyphen `\-` lowers to a literal U+00AD inside a
251 /// surrounding [`InlineKind::Text`] run, not to a separate variant.
252 HardBreak,
253}
254
255impl Item {
256 /// Borrow the heading payload if `self` is [`Item::Heading`].
257 #[must_use]
258 pub fn as_heading(&self) -> Option<(u8, &[Inline], &SourceSpan)> {
259 if let Self::Heading {
260 level,
261 inlines,
262 span,
263 ..
264 } = self
265 {
266 Some((*level, inlines, span))
267 } else {
268 None
269 }
270 }
271
272 /// Borrow the paragraph payload if `self` is [`Item::Paragraph`].
273 #[must_use]
274 pub fn as_paragraph(&self) -> Option<(&[Inline], &SourceSpan)> {
275 if let Self::Paragraph { inlines, span, .. } = self {
276 Some((inlines, span))
277 } else {
278 None
279 }
280 }
281
282 /// Borrow the directive payload if `self` is [`Item::Set`].
283 ///
284 /// The returned tuple is `(name, args, span)`; the caller can also
285 /// reach [`DirectiveKind`] via [`Self::directive_kind`]. The
286 /// accessor name is retained for back-compat; every existing
287 /// caller pre-dates the `#image`/`#figure` directives and only
288 /// looks at name/args/span.
289 #[must_use]
290 pub fn as_set(&self) -> Option<(&str, &[SetArg], &SourceSpan)> {
291 if let Self::Set {
292 name, args, span, ..
293 } = self
294 {
295 Some((name.as_str(), args.as_slice(), span))
296 } else {
297 None
298 }
299 }
300
301 /// Borrow the raw block payload if `self` is [`Item::RawBlock`].
302 #[must_use]
303 pub fn as_raw_block(&self) -> Option<RawBlockView<'_>> {
304 if let Self::RawBlock {
305 kind,
306 args,
307 text,
308 label,
309 label_span,
310 span,
311 } = self
312 {
313 Some(RawBlockView {
314 kind: *kind,
315 args: args.as_slice(),
316 text: text.as_str(),
317 label: label.as_deref(),
318 label_span: label_span.as_ref(),
319 span,
320 })
321 } else {
322 None
323 }
324 }
325
326 /// Borrow the [`DirectiveKind`] tag if `self` is [`Item::Set`].
327 #[must_use]
328 pub fn directive_kind(&self) -> Option<DirectiveKind> {
329 if let Self::Set { kind, .. } = self {
330 Some(*kind)
331 } else {
332 None
333 }
334 }
335
336 /// Borrow the list payload if `self` is [`Item::List`]. The
337 /// returned tuple is `(ordered, items, span)`.
338 #[must_use]
339 pub fn as_list(&self) -> Option<(bool, &[ListItem], &SourceSpan)> {
340 if let Self::List {
341 ordered,
342 items,
343 span,
344 } = self
345 {
346 Some((*ordered, items.as_slice(), span))
347 } else {
348 None
349 }
350 }
351
352 /// Borrow the explicit `<label>` attached to this block, if any.
353 /// Returns `None` for [`Item::Set`] and [`Item::List`] (label
354 /// syntax is not yet defined on those blocks).
355 #[must_use]
356 pub fn label(&self) -> Option<&str> {
357 match self {
358 Self::Heading { label, .. }
359 | Self::Paragraph { label, .. }
360 | Self::RawBlock { label, .. } => label.as_deref(),
361 Self::Set { .. } | Self::List { .. } => None,
362 }
363 }
364
365 /// Borrow the source span covering only the label token text, if any.
366 /// The delimiters (`<`, `>`, or directive string quotes) are excluded so a
367 /// structured suggestion can replace just the label bytes.
368 #[must_use]
369 pub fn label_span(&self) -> Option<&SourceSpan> {
370 match self {
371 Self::Heading { label_span, .. }
372 | Self::Paragraph { label_span, .. }
373 | Self::RawBlock { label_span, .. } => label_span.as_ref(),
374 Self::Set { .. } | Self::List { .. } => None,
375 }
376 }
377}
378
379/// Output of [`crate::parse`]. Diagnostics may include warnings even
380/// when the tree is structurally usable; callers decide what to do per
381/// [`ParseResult::has_errors`].
382#[derive(Debug)]
383pub struct ParseResult {
384 pub tree: SyntaxTree,
385 pub diagnostics: Vec<Diagnostic>,
386}
387
388impl ParseResult {
389 #[must_use]
390 pub fn has_errors(&self) -> bool {
391 self.diagnostics
392 .iter()
393 .any(|d| d.severity() == Severity::Error)
394 }
395}