mos_core/document.rs
1//! The lowered semantic document graph (manifest §5, §6 stage 2).
2//!
3//! [`Document`] owns every [`Node`] and hands them out through their stable
4//! [`NodeId`]. Each node carries a [`NodeKind`], a [`SourceSpan`], a
5//! [`ContentHash`], a [`StyleId`], and an [`AttrMap`] of [`AttrValue`]s.
6
7use std::collections::BTreeMap;
8use std::path::PathBuf;
9use std::sync::Arc;
10
11use crate::{ContentHash, SourceSpan};
12
13/// Stable identifier for a document node.
14///
15/// Per manifest §5.1, IDs should ideally be derived from
16/// `hash(file path + syntactic position + explicit label + local structure)`
17/// rather than parse order. The MVP 0 lowerer (`mos-eval`) hands out
18/// monotonic IDs through `Document::alloc`; the hash-based derivation is
19/// deferred to MVP 5 when stable IDs become observable through the cache.
20///
21/// # Examples
22///
23/// ```
24/// use mos_core::NodeId;
25///
26/// let root = NodeId(0);
27///
28/// assert_eq!(root.0, 0);
29/// ```
30#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
31pub struct NodeId(pub u64);
32
33/// Identifier for a resolved style bundle.
34///
35/// # Examples
36///
37/// ```
38/// use mos_core::StyleId;
39///
40/// let style = StyleId::default();
41///
42/// assert_eq!(style.0, 0);
43/// ```
44#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
45pub struct StyleId(pub u32);
46
47/// The kinds of nodes Mosaic recognises (manifest §5.1).
48///
49/// # Examples
50///
51/// ```
52/// use mos_core::NodeKind;
53///
54/// let kind = NodeKind::Paragraph;
55///
56/// assert_eq!(kind, NodeKind::Paragraph);
57/// ```
58#[derive(Copy, Clone, Eq, PartialEq, Debug)]
59pub enum NodeKind {
60 Document,
61 Section,
62 Paragraph,
63 Text,
64 Emphasis,
65 Strong,
66 BoldItalic,
67 Math,
68 Equation,
69 /// A captioned container: an image plus a caption paragraph, laid
70 /// out together with the caption beneath. Cross-references via
71 /// `@fig:foo` will target this kind once MVP 3 lands.
72 Figure,
73 /// A raster image (PNG / JPEG in MVP 1.5). The decoded pixel data
74 /// and natural dimensions live on the node's attributes; see the
75 /// `mos-eval` resolver for the exact attribute names.
76 Image,
77 Table,
78 Citation,
79 Reference,
80 /// A `@page(label)` reference to the printed page number of a labelled
81 /// target. Distinct from [`Reference`](Self::Reference) (which resolves to
82 /// a section/figure number): a page reference resolves to where the target
83 /// lands, which is only known after layout, via the resolve↔layout fixpoint
84 /// (issue #72). Carries a `label` attribute and placeholder `text`; layout
85 /// renders the `text` attribute like any inline run.
86 PageReference,
87 Theorem,
88 Footnote,
89 Bibliography,
90 Raw,
91 /// A bullet or numbered list. The `ordered` attribute distinguishes
92 /// the two kinds and child nodes are [`NodeKind::ListItem`]s.
93 List,
94 /// One entry inside a [`NodeKind::List`]. Inline children carry the
95 /// item's text; nested [`NodeKind::List`] children describe deeper
96 /// levels.
97 ListItem,
98 /// `\\`: a forced line break inside a paragraph. Carries no
99 /// attributes; layout consumes it as a `WordItem::HardBreak`
100 /// sentinel in the inline word stream. A blank-line paragraph
101 /// break is **not** the same node: it ends the paragraph and
102 /// triggers paragraph-spacing leading, whereas `HardBreak` keeps
103 /// the same paragraph and applies normal inter-line leading.
104 HardBreak,
105}
106
107/// A semantic document node (manifest §5.1).
108///
109/// Nodes are allocated only by [`Document::alloc`] / [`Document::alloc_child`]
110/// from a [`NodeSpec`]: the arena assigns the [`NodeId`] and owns the
111/// `content_hash`/`style_id` placeholders. Those two fields are `pub(crate)`,
112/// which makes the struct literal unconstructible outside this crate, so no
113/// caller can fabricate a node with a fake id or a hand-set hash.
114///
115/// # Examples
116///
117/// ```
118/// use std::path::PathBuf;
119///
120/// use mos_core::{Document, NodeKind, NodeSpec, SourceSpan};
121///
122/// let file = PathBuf::from("main.mos");
123/// let mut doc = Document::new(file.clone());
124/// let id = doc.alloc(NodeSpec::new(NodeKind::Paragraph, SourceSpan::placeholder(file)));
125///
126/// assert_eq!(doc.get(id).map(|node| &node.kind), Some(&NodeKind::Paragraph));
127/// ```
128#[derive(Clone, Debug)]
129pub struct Node {
130 pub id: NodeId,
131 pub kind: NodeKind,
132 pub span: SourceSpan,
133 pub children: Vec<NodeId>,
134 pub attributes: AttrMap,
135 /// Hash-derived identity placeholder (manifest §5.1); set by the arena,
136 /// always default until the MVP 5 cache work. `pub(crate)` to seal
137 /// external construction.
138 pub(crate) content_hash: ContentHash,
139 /// Resolved style slot placeholder; set by the arena, always default
140 /// until styling lands. `pub(crate)` to seal external construction.
141 pub(crate) style_id: StyleId,
142}
143
144impl Node {
145 /// The node's content hash: a hash-derived identity placeholder
146 /// (manifest §5.1), default until the MVP 5 cache work. Read-only: the
147 /// arena owns this field.
148 #[must_use]
149 pub const fn content_hash(&self) -> ContentHash {
150 self.content_hash
151 }
152
153 /// The node's resolved style slot: a placeholder, default until styling
154 /// lands. Read-only: the arena owns this field.
155 #[must_use]
156 pub const fn style_id(&self) -> StyleId {
157 self.style_id
158 }
159}
160
161/// The blueprint for a node handed to [`Document::alloc`] /
162/// [`Document::alloc_child`]. Carries only the fields a caller legitimately
163/// chooses: `kind`, `span`, and `attributes`. The arena supplies the
164/// `id`, the empty `children` list, and the `content_hash`/`style_id`
165/// placeholders, so an invalid node is unrepresentable at the call site.
166#[derive(Clone, Debug)]
167pub struct NodeSpec {
168 pub kind: NodeKind,
169 pub span: SourceSpan,
170 pub attributes: AttrMap,
171}
172
173impl NodeSpec {
174 /// A spec for a node of `kind` spanning `span`, with no attributes.
175 #[must_use]
176 pub fn new(kind: NodeKind, span: SourceSpan) -> Self {
177 Self {
178 kind,
179 span,
180 attributes: AttrMap::new(),
181 }
182 }
183
184 /// Attach `attributes` to this spec.
185 #[must_use]
186 pub fn with_attributes(mut self, attributes: AttrMap) -> Self {
187 self.attributes = attributes;
188 self
189 }
190}
191
192/// Attribute map carried on each node. Keys are interned strings in a
193/// later iteration; for now plain `String` keys are fine for the stub.
194pub type AttrMap = BTreeMap<String, AttrValue>;
195
196/// Attribute value carried on a semantic [`Node`].
197///
198/// # Examples
199///
200/// ```
201/// use mos_core::AttrValue;
202///
203/// let value = AttrValue::Str("intro".to_owned());
204///
205/// assert_eq!(value, AttrValue::Str("intro".to_owned()));
206/// ```
207#[derive(Clone, Debug, PartialEq)]
208pub enum AttrValue {
209 Bool(bool),
210 Int(i64),
211 Float(f64),
212 Str(String),
213 List(Vec<Self>),
214 /// A length already resolved to PDF points. The parser carries
215 /// unit-tagged literals (`mm`, `pt`, `em`); the lowerer converts
216 /// them to a single canonical scalar so layout never has to know
217 /// about units.
218 Length(f64),
219 /// Opaque binary payload; currently used to carry decoded raster
220 /// image pixels (RGB8) onto an [`NodeKind::Image`] node so the PDF
221 /// backend can emit them as an Image `XObject` without re-reading the
222 /// source file.
223 ///
224 /// Stored as `Arc<[u8]>` so a node carrying decoded pixels is cheap
225 /// to clone (e.g. across cache boundaries or when the same image
226 /// would otherwise be duplicated through the document graph). The
227 /// layout engine still dedups by resolved path, so most documents
228 /// hold one buffer per image regardless; the `Arc` is insurance
229 /// against accidental copies on the eval → layout boundary.
230 Bytes(Arc<[u8]>),
231}
232
233/// The lowered semantic document graph (manifest §5, §6 stage 2).
234///
235/// Owns every [`Node`] and exposes them through their stable [`NodeId`].
236/// MVP 0 stores nodes in insertion order; the manifest §5.1 hash-derived
237/// IDs land alongside the cache work in MVP 5.
238///
239/// # Examples
240///
241/// ```
242/// use std::path::PathBuf;
243///
244/// use mos_core::{Document, NodeId};
245///
246/// let doc = Document::new(PathBuf::from("main.mos"));
247///
248/// assert_eq!(doc.root, NodeId(0));
249/// ```
250#[derive(Debug)]
251pub struct Document {
252 pub root: NodeId,
253 pub file: PathBuf,
254 nodes: BTreeMap<NodeId, Node>,
255 next_id: u64,
256}
257
258impl Document {
259 /// Create an empty document rooted at `file`. Allocates the
260 /// `Document` root node (`NodeId(0)`) eagerly so callers can append
261 /// children to it immediately.
262 ///
263 /// # Examples
264 ///
265 /// ```
266 /// use std::path::PathBuf;
267 ///
268 /// use mos_core::Document;
269 ///
270 /// let doc = Document::new(PathBuf::from("main.mos"));
271 ///
272 /// assert_eq!(doc.len(), 1);
273 /// ```
274 #[must_use]
275 pub fn new(file: PathBuf) -> Self {
276 let root_id = NodeId(0);
277 let root_node = Node {
278 id: root_id,
279 kind: NodeKind::Document,
280 span: SourceSpan::placeholder(file.clone()),
281 content_hash: ContentHash::default(),
282 style_id: StyleId::default(),
283 children: Vec::new(),
284 attributes: AttrMap::new(),
285 };
286 let mut nodes = BTreeMap::new();
287 nodes.insert(root_id, root_node);
288 Self {
289 root: root_id,
290 file,
291 nodes,
292 next_id: 1,
293 }
294 }
295
296 /// Allocate a node from `spec` in the arena and return its assigned
297 /// [`NodeId`]. The arena fills in the id, an empty `children` list, and
298 /// the default `content_hash`/`style_id` placeholders.
299 ///
300 /// # Examples
301 ///
302 /// ```
303 /// use std::path::PathBuf;
304 ///
305 /// use mos_core::{Document, NodeId, NodeKind, NodeSpec, SourceSpan};
306 ///
307 /// let file = PathBuf::from("main.mos");
308 /// let mut doc = Document::new(file.clone());
309 /// let id = doc.alloc(NodeSpec::new(NodeKind::Paragraph, SourceSpan::placeholder(file)));
310 ///
311 /// assert_eq!(id, NodeId(1));
312 /// ```
313 pub fn alloc(&mut self, spec: NodeSpec) -> NodeId {
314 let id = NodeId(self.next_id);
315 self.next_id += 1;
316 self.nodes.insert(id, Self::node_from_spec(id, spec));
317 id
318 }
319
320 /// Build the arena-owned [`Node`] for `id` from a caller's [`NodeSpec`],
321 /// supplying the fields the caller does not control.
322 fn node_from_spec(id: NodeId, spec: NodeSpec) -> Node {
323 Node {
324 id,
325 kind: spec.kind,
326 span: spec.span,
327 children: Vec::new(),
328 attributes: spec.attributes,
329 content_hash: ContentHash::default(),
330 style_id: StyleId::default(),
331 }
332 }
333
334 /// Allocate a node from `spec` as a child of `parent` and return its
335 /// [`NodeId`].
336 ///
337 /// # Panics
338 ///
339 /// Panics if `parent` is not a node already allocated by this
340 /// `Document`. Silently producing detached nodes would hide lowerer
341 /// bugs in release builds, so this is intentionally a release-time
342 /// assertion rather than a `debug_assert!`.
343 ///
344 /// # Examples
345 ///
346 /// ```
347 /// use std::path::PathBuf;
348 ///
349 /// use mos_core::{Document, NodeKind, NodeSpec, SourceSpan};
350 ///
351 /// let file = PathBuf::from("main.mos");
352 /// let mut doc = Document::new(file.clone());
353 /// let child = doc.alloc_child(doc.root, NodeSpec::new(NodeKind::Paragraph, SourceSpan::placeholder(file)));
354 ///
355 /// assert_eq!(doc.get(doc.root).map(|node| node.children.as_slice()), Some(&[child][..]));
356 /// ```
357 pub fn alloc_child(&mut self, parent: NodeId, spec: NodeSpec) -> NodeId {
358 assert!(
359 self.nodes.contains_key(&parent),
360 "Document::alloc_child: unknown parent {parent:?}"
361 );
362 let child_id = self.alloc(spec);
363 // Safe to index: we just verified the key exists, and `alloc`
364 // doesn't remove existing entries.
365 if let Some(parent_node) = self.nodes.get_mut(&parent) {
366 parent_node.children.push(child_id);
367 }
368 child_id
369 }
370
371 /// Get a node by id.
372 ///
373 /// # Examples
374 ///
375 /// ```
376 /// use std::path::PathBuf;
377 ///
378 /// use mos_core::{Document, NodeKind};
379 ///
380 /// let doc = Document::new(PathBuf::from("main.mos"));
381 ///
382 /// assert_eq!(doc.get(doc.root).map(|node| node.kind), Some(NodeKind::Document));
383 /// ```
384 #[must_use]
385 pub fn get(&self, id: NodeId) -> Option<&Node> {
386 self.nodes.get(&id)
387 }
388
389 /// Mutable accessor for a single node. Used by the resolver
390 /// (manifest §6 stage 3) to back-patch attributes like `number`
391 /// onto sections and `text` onto `@label` references.
392 ///
393 /// # Examples
394 ///
395 /// ```
396 /// use std::path::PathBuf;
397 ///
398 /// use mos_core::{AttrValue, Document};
399 ///
400 /// let mut doc = Document::new(PathBuf::from("main.mos"));
401 /// if let Some(root) = doc.get_mut(doc.root) {
402 /// root.attributes.insert("title".to_owned(), AttrValue::Str("Demo".to_owned()));
403 /// }
404 ///
405 /// assert!(doc.get(doc.root).is_some_and(|node| node.attributes.contains_key("title")));
406 /// ```
407 #[must_use]
408 pub fn get_mut(&mut self, id: NodeId) -> Option<&mut Node> {
409 self.nodes.get_mut(&id)
410 }
411
412 /// Iterate over every node in the arena in insertion order.
413 ///
414 /// # Examples
415 ///
416 /// ```
417 /// use std::path::PathBuf;
418 ///
419 /// use mos_core::{Document, NodeKind};
420 ///
421 /// let doc = Document::new(PathBuf::from("main.mos"));
422 /// let kinds: Vec<NodeKind> = doc.nodes().map(|node| node.kind).collect();
423 ///
424 /// assert_eq!(kinds, vec![NodeKind::Document]);
425 /// ```
426 pub fn nodes(&self) -> impl Iterator<Item = &Node> {
427 self.nodes.values()
428 }
429
430 /// Total number of nodes including the document root.
431 ///
432 /// # Examples
433 ///
434 /// ```
435 /// use std::path::PathBuf;
436 ///
437 /// use mos_core::Document;
438 ///
439 /// let doc = Document::new(PathBuf::from("main.mos"));
440 ///
441 /// assert_eq!(doc.len(), 1);
442 /// ```
443 #[must_use]
444 pub fn len(&self) -> usize {
445 self.nodes.len()
446 }
447
448 /// Return whether the document has no semantic content beyond the root.
449 ///
450 /// # Examples
451 ///
452 /// ```
453 /// use std::path::PathBuf;
454 ///
455 /// use mos_core::Document;
456 ///
457 /// let doc = Document::new(PathBuf::from("main.mos"));
458 ///
459 /// assert!(doc.is_empty());
460 /// ```
461 #[must_use]
462 pub fn is_empty(&self) -> bool {
463 // The root always exists, so `Document` is never truly empty;
464 // expose the conventional method anyway for clippy compliance.
465 self.len() <= 1
466 }
467}
468
469#[cfg(test)]
470mod tests {
471 use super::*;
472
473 #[test]
474 #[should_panic(expected = "unknown parent")]
475 fn alloc_child_panics_on_unknown_parent() {
476 let mut doc = Document::new(PathBuf::from("test.mos"));
477 // `NodeId(9999)` was never allocated by `doc`; the call must
478 // abort instead of leaking a detached node.
479 doc.alloc_child(
480 NodeId(9999),
481 NodeSpec::new(
482 NodeKind::Text,
483 SourceSpan::placeholder(PathBuf::from("test.mos")),
484 ),
485 );
486 }
487
488 #[test]
489 fn document_alloc_and_traverse() {
490 let mut doc = Document::new(PathBuf::from("test.mos"));
491 let para = doc.alloc_child(
492 doc.root,
493 NodeSpec::new(
494 NodeKind::Paragraph,
495 SourceSpan::placeholder(PathBuf::from("test.mos")),
496 ),
497 );
498 doc.alloc_child(
499 para,
500 NodeSpec::new(
501 NodeKind::Text,
502 SourceSpan::placeholder(PathBuf::from("test.mos")),
503 ),
504 );
505 assert_eq!(doc.len(), 3);
506 assert_eq!(doc.get(doc.root).unwrap().children.len(), 1);
507 assert_eq!(doc.get(para).unwrap().children.len(), 1);
508 }
509}