Skip to main content

mos_core/
document.rs

1//! The lowered semantic document graph (manifest §5, §6 stage 2).
2//!
3//! [`Document`] owns every [`Node`] and hands them out through their stable
4//! [`NodeId`]. Each node carries a [`NodeKind`], a [`SourceSpan`], a
5//! [`ContentHash`], a [`StyleId`], and an [`AttrMap`] of [`AttrValue`]s.
6
7use std::collections::BTreeMap;
8use std::path::PathBuf;
9use std::sync::Arc;
10
11use crate::{ContentHash, SourceSpan};
12
13/// Stable identifier for a document node.
14///
15/// Per manifest §5.1, IDs should ideally be derived from
16/// `hash(file path + syntactic position + explicit label + local structure)`
17/// rather than parse order. The MVP 0 lowerer (`mos-eval`) hands out
18/// monotonic IDs through `Document::alloc`; the hash-based derivation is
19/// deferred to MVP 5 when stable IDs become observable through the cache.
20///
21/// # Examples
22///
23/// ```
24/// use mos_core::NodeId;
25///
26/// let root = NodeId(0);
27///
28/// assert_eq!(root.0, 0);
29/// ```
30#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
31pub struct NodeId(pub u64);
32
33/// Identifier for a resolved style bundle.
34///
35/// # Examples
36///
37/// ```
38/// use mos_core::StyleId;
39///
40/// let style = StyleId::default();
41///
42/// assert_eq!(style.0, 0);
43/// ```
44#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
45pub struct StyleId(pub u32);
46
47/// The kinds of nodes Mosaic recognises (manifest §5.1).
48///
49/// # Examples
50///
51/// ```
52/// use mos_core::NodeKind;
53///
54/// let kind = NodeKind::Paragraph;
55///
56/// assert_eq!(kind, NodeKind::Paragraph);
57/// ```
58#[derive(Copy, Clone, Eq, PartialEq, Debug)]
59pub enum NodeKind {
60    Document,
61    Section,
62    Paragraph,
63    Text,
64    Emphasis,
65    Strong,
66    BoldItalic,
67    Math,
68    Equation,
69    /// A captioned container: an image plus a caption paragraph, laid
70    /// out together with the caption beneath. Cross-references via
71    /// `@fig:foo` will target this kind once MVP 3 lands.
72    Figure,
73    /// A raster image (PNG / JPEG in MVP 1.5). The decoded pixel data
74    /// and natural dimensions live on the node's attributes; see the
75    /// `mos-eval` resolver for the exact attribute names.
76    Image,
77    Table,
78    Citation,
79    Reference,
80    /// A `@page(label)` reference to the printed page number of a labelled
81    /// target. Distinct from [`Reference`](Self::Reference) (which resolves to
82    /// a section/figure number): a page reference resolves to where the target
83    /// lands, which is only known after layout, via the resolve↔layout fixpoint
84    /// (issue #72). Carries a `label` attribute and placeholder `text`; layout
85    /// renders the `text` attribute like any inline run.
86    PageReference,
87    Theorem,
88    Footnote,
89    Bibliography,
90    Raw,
91    /// A bullet or numbered list. The `ordered` attribute distinguishes
92    /// the two kinds and child nodes are [`NodeKind::ListItem`]s.
93    List,
94    /// One entry inside a [`NodeKind::List`]. Inline children carry the
95    /// item's text; nested [`NodeKind::List`] children describe deeper
96    /// levels.
97    ListItem,
98    /// `\\`: a forced line break inside a paragraph. Carries no
99    /// attributes; layout consumes it as a `WordItem::HardBreak`
100    /// sentinel in the inline word stream. A blank-line paragraph
101    /// break is **not** the same node: it ends the paragraph and
102    /// triggers paragraph-spacing leading, whereas `HardBreak` keeps
103    /// the same paragraph and applies normal inter-line leading.
104    HardBreak,
105}
106
107/// A semantic document node (manifest §5.1).
108///
109/// Nodes are allocated only by [`Document::alloc`] / [`Document::alloc_child`]
110/// from a [`NodeSpec`]: the arena assigns the [`NodeId`] and owns the
111/// `content_hash`/`style_id` placeholders. Those two fields are `pub(crate)`,
112/// which makes the struct literal unconstructible outside this crate, so no
113/// caller can fabricate a node with a fake id or a hand-set hash.
114///
115/// # Examples
116///
117/// ```
118/// use std::path::PathBuf;
119///
120/// use mos_core::{Document, NodeKind, NodeSpec, SourceSpan};
121///
122/// let file = PathBuf::from("main.mos");
123/// let mut doc = Document::new(file.clone());
124/// let id = doc.alloc(NodeSpec::new(NodeKind::Paragraph, SourceSpan::placeholder(file)));
125///
126/// assert_eq!(doc.get(id).map(|node| &node.kind), Some(&NodeKind::Paragraph));
127/// ```
128#[derive(Clone, Debug)]
129pub struct Node {
130    pub id: NodeId,
131    pub kind: NodeKind,
132    pub span: SourceSpan,
133    pub children: Vec<NodeId>,
134    pub attributes: AttrMap,
135    /// Hash-derived identity placeholder (manifest §5.1); set by the arena,
136    /// always default until the MVP 5 cache work. `pub(crate)` to seal
137    /// external construction.
138    pub(crate) content_hash: ContentHash,
139    /// Resolved style slot placeholder; set by the arena, always default
140    /// until styling lands. `pub(crate)` to seal external construction.
141    pub(crate) style_id: StyleId,
142}
143
144impl Node {
145    /// The node's content hash: a hash-derived identity placeholder
146    /// (manifest §5.1), default until the MVP 5 cache work. Read-only: the
147    /// arena owns this field.
148    #[must_use]
149    pub const fn content_hash(&self) -> ContentHash {
150        self.content_hash
151    }
152
153    /// The node's resolved style slot: a placeholder, default until styling
154    /// lands. Read-only: the arena owns this field.
155    #[must_use]
156    pub const fn style_id(&self) -> StyleId {
157        self.style_id
158    }
159}
160
161/// The blueprint for a node handed to [`Document::alloc`] /
162/// [`Document::alloc_child`]. Carries only the fields a caller legitimately
163/// chooses: `kind`, `span`, and `attributes`. The arena supplies the
164/// `id`, the empty `children` list, and the `content_hash`/`style_id`
165/// placeholders, so an invalid node is unrepresentable at the call site.
166#[derive(Clone, Debug)]
167pub struct NodeSpec {
168    pub kind: NodeKind,
169    pub span: SourceSpan,
170    pub attributes: AttrMap,
171}
172
173impl NodeSpec {
174    /// A spec for a node of `kind` spanning `span`, with no attributes.
175    #[must_use]
176    pub fn new(kind: NodeKind, span: SourceSpan) -> Self {
177        Self {
178            kind,
179            span,
180            attributes: AttrMap::new(),
181        }
182    }
183
184    /// Attach `attributes` to this spec.
185    #[must_use]
186    pub fn with_attributes(mut self, attributes: AttrMap) -> Self {
187        self.attributes = attributes;
188        self
189    }
190}
191
192/// Attribute map carried on each node. Keys are interned strings in a
193/// later iteration; for now plain `String` keys are fine for the stub.
194pub type AttrMap = BTreeMap<String, AttrValue>;
195
196/// Attribute value carried on a semantic [`Node`].
197///
198/// # Examples
199///
200/// ```
201/// use mos_core::AttrValue;
202///
203/// let value = AttrValue::Str("intro".to_owned());
204///
205/// assert_eq!(value, AttrValue::Str("intro".to_owned()));
206/// ```
207#[derive(Clone, Debug, PartialEq)]
208pub enum AttrValue {
209    Bool(bool),
210    Int(i64),
211    Float(f64),
212    Str(String),
213    List(Vec<Self>),
214    /// A length already resolved to PDF points. The parser carries
215    /// unit-tagged literals (`mm`, `pt`, `em`); the lowerer converts
216    /// them to a single canonical scalar so layout never has to know
217    /// about units.
218    Length(f64),
219    /// Opaque binary payload; currently used to carry decoded raster
220    /// image pixels (RGB8) onto an [`NodeKind::Image`] node so the PDF
221    /// backend can emit them as an Image `XObject` without re-reading the
222    /// source file.
223    ///
224    /// Stored as `Arc<[u8]>` so a node carrying decoded pixels is cheap
225    /// to clone (e.g. across cache boundaries or when the same image
226    /// would otherwise be duplicated through the document graph). The
227    /// layout engine still dedups by resolved path, so most documents
228    /// hold one buffer per image regardless; the `Arc` is insurance
229    /// against accidental copies on the eval → layout boundary.
230    Bytes(Arc<[u8]>),
231}
232
233/// The lowered semantic document graph (manifest §5, §6 stage 2).
234///
235/// Owns every [`Node`] and exposes them through their stable [`NodeId`].
236/// MVP 0 stores nodes in insertion order; the manifest §5.1 hash-derived
237/// IDs land alongside the cache work in MVP 5.
238///
239/// # Examples
240///
241/// ```
242/// use std::path::PathBuf;
243///
244/// use mos_core::{Document, NodeId};
245///
246/// let doc = Document::new(PathBuf::from("main.mos"));
247///
248/// assert_eq!(doc.root, NodeId(0));
249/// ```
250#[derive(Debug)]
251pub struct Document {
252    pub root: NodeId,
253    pub file: PathBuf,
254    nodes: BTreeMap<NodeId, Node>,
255    next_id: u64,
256}
257
258impl Document {
259    /// Create an empty document rooted at `file`. Allocates the
260    /// `Document` root node (`NodeId(0)`) eagerly so callers can append
261    /// children to it immediately.
262    ///
263    /// # Examples
264    ///
265    /// ```
266    /// use std::path::PathBuf;
267    ///
268    /// use mos_core::Document;
269    ///
270    /// let doc = Document::new(PathBuf::from("main.mos"));
271    ///
272    /// assert_eq!(doc.len(), 1);
273    /// ```
274    #[must_use]
275    pub fn new(file: PathBuf) -> Self {
276        let root_id = NodeId(0);
277        let root_node = Node {
278            id: root_id,
279            kind: NodeKind::Document,
280            span: SourceSpan::placeholder(file.clone()),
281            content_hash: ContentHash::default(),
282            style_id: StyleId::default(),
283            children: Vec::new(),
284            attributes: AttrMap::new(),
285        };
286        let mut nodes = BTreeMap::new();
287        nodes.insert(root_id, root_node);
288        Self {
289            root: root_id,
290            file,
291            nodes,
292            next_id: 1,
293        }
294    }
295
296    /// Allocate a node from `spec` in the arena and return its assigned
297    /// [`NodeId`]. The arena fills in the id, an empty `children` list, and
298    /// the default `content_hash`/`style_id` placeholders.
299    ///
300    /// # Examples
301    ///
302    /// ```
303    /// use std::path::PathBuf;
304    ///
305    /// use mos_core::{Document, NodeId, NodeKind, NodeSpec, SourceSpan};
306    ///
307    /// let file = PathBuf::from("main.mos");
308    /// let mut doc = Document::new(file.clone());
309    /// let id = doc.alloc(NodeSpec::new(NodeKind::Paragraph, SourceSpan::placeholder(file)));
310    ///
311    /// assert_eq!(id, NodeId(1));
312    /// ```
313    pub fn alloc(&mut self, spec: NodeSpec) -> NodeId {
314        let id = NodeId(self.next_id);
315        self.next_id += 1;
316        self.nodes.insert(id, Self::node_from_spec(id, spec));
317        id
318    }
319
320    /// Build the arena-owned [`Node`] for `id` from a caller's [`NodeSpec`],
321    /// supplying the fields the caller does not control.
322    fn node_from_spec(id: NodeId, spec: NodeSpec) -> Node {
323        Node {
324            id,
325            kind: spec.kind,
326            span: spec.span,
327            children: Vec::new(),
328            attributes: spec.attributes,
329            content_hash: ContentHash::default(),
330            style_id: StyleId::default(),
331        }
332    }
333
334    /// Allocate a node from `spec` as a child of `parent` and return its
335    /// [`NodeId`].
336    ///
337    /// # Panics
338    ///
339    /// Panics if `parent` is not a node already allocated by this
340    /// `Document`. Silently producing detached nodes would hide lowerer
341    /// bugs in release builds, so this is intentionally a release-time
342    /// assertion rather than a `debug_assert!`.
343    ///
344    /// # Examples
345    ///
346    /// ```
347    /// use std::path::PathBuf;
348    ///
349    /// use mos_core::{Document, NodeKind, NodeSpec, SourceSpan};
350    ///
351    /// let file = PathBuf::from("main.mos");
352    /// let mut doc = Document::new(file.clone());
353    /// let child = doc.alloc_child(doc.root, NodeSpec::new(NodeKind::Paragraph, SourceSpan::placeholder(file)));
354    ///
355    /// assert_eq!(doc.get(doc.root).map(|node| node.children.as_slice()), Some(&[child][..]));
356    /// ```
357    pub fn alloc_child(&mut self, parent: NodeId, spec: NodeSpec) -> NodeId {
358        assert!(
359            self.nodes.contains_key(&parent),
360            "Document::alloc_child: unknown parent {parent:?}"
361        );
362        let child_id = self.alloc(spec);
363        // Safe to index: we just verified the key exists, and `alloc`
364        // doesn't remove existing entries.
365        if let Some(parent_node) = self.nodes.get_mut(&parent) {
366            parent_node.children.push(child_id);
367        }
368        child_id
369    }
370
371    /// Get a node by id.
372    ///
373    /// # Examples
374    ///
375    /// ```
376    /// use std::path::PathBuf;
377    ///
378    /// use mos_core::{Document, NodeKind};
379    ///
380    /// let doc = Document::new(PathBuf::from("main.mos"));
381    ///
382    /// assert_eq!(doc.get(doc.root).map(|node| node.kind), Some(NodeKind::Document));
383    /// ```
384    #[must_use]
385    pub fn get(&self, id: NodeId) -> Option<&Node> {
386        self.nodes.get(&id)
387    }
388
389    /// Mutable accessor for a single node. Used by the resolver
390    /// (manifest §6 stage 3) to back-patch attributes like `number`
391    /// onto sections and `text` onto `@label` references.
392    ///
393    /// # Examples
394    ///
395    /// ```
396    /// use std::path::PathBuf;
397    ///
398    /// use mos_core::{AttrValue, Document};
399    ///
400    /// let mut doc = Document::new(PathBuf::from("main.mos"));
401    /// if let Some(root) = doc.get_mut(doc.root) {
402    ///     root.attributes.insert("title".to_owned(), AttrValue::Str("Demo".to_owned()));
403    /// }
404    ///
405    /// assert!(doc.get(doc.root).is_some_and(|node| node.attributes.contains_key("title")));
406    /// ```
407    #[must_use]
408    pub fn get_mut(&mut self, id: NodeId) -> Option<&mut Node> {
409        self.nodes.get_mut(&id)
410    }
411
412    /// Iterate over every node in the arena in insertion order.
413    ///
414    /// # Examples
415    ///
416    /// ```
417    /// use std::path::PathBuf;
418    ///
419    /// use mos_core::{Document, NodeKind};
420    ///
421    /// let doc = Document::new(PathBuf::from("main.mos"));
422    /// let kinds: Vec<NodeKind> = doc.nodes().map(|node| node.kind).collect();
423    ///
424    /// assert_eq!(kinds, vec![NodeKind::Document]);
425    /// ```
426    pub fn nodes(&self) -> impl Iterator<Item = &Node> {
427        self.nodes.values()
428    }
429
430    /// Total number of nodes including the document root.
431    ///
432    /// # Examples
433    ///
434    /// ```
435    /// use std::path::PathBuf;
436    ///
437    /// use mos_core::Document;
438    ///
439    /// let doc = Document::new(PathBuf::from("main.mos"));
440    ///
441    /// assert_eq!(doc.len(), 1);
442    /// ```
443    #[must_use]
444    pub fn len(&self) -> usize {
445        self.nodes.len()
446    }
447
448    /// Return whether the document has no semantic content beyond the root.
449    ///
450    /// # Examples
451    ///
452    /// ```
453    /// use std::path::PathBuf;
454    ///
455    /// use mos_core::Document;
456    ///
457    /// let doc = Document::new(PathBuf::from("main.mos"));
458    ///
459    /// assert!(doc.is_empty());
460    /// ```
461    #[must_use]
462    pub fn is_empty(&self) -> bool {
463        // The root always exists, so `Document` is never truly empty;
464        // expose the conventional method anyway for clippy compliance.
465        self.len() <= 1
466    }
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472
473    #[test]
474    #[should_panic(expected = "unknown parent")]
475    fn alloc_child_panics_on_unknown_parent() {
476        let mut doc = Document::new(PathBuf::from("test.mos"));
477        // `NodeId(9999)` was never allocated by `doc`; the call must
478        // abort instead of leaking a detached node.
479        doc.alloc_child(
480            NodeId(9999),
481            NodeSpec::new(
482                NodeKind::Text,
483                SourceSpan::placeholder(PathBuf::from("test.mos")),
484            ),
485        );
486    }
487
488    #[test]
489    fn document_alloc_and_traverse() {
490        let mut doc = Document::new(PathBuf::from("test.mos"));
491        let para = doc.alloc_child(
492            doc.root,
493            NodeSpec::new(
494                NodeKind::Paragraph,
495                SourceSpan::placeholder(PathBuf::from("test.mos")),
496            ),
497        );
498        doc.alloc_child(
499            para,
500            NodeSpec::new(
501                NodeKind::Text,
502                SourceSpan::placeholder(PathBuf::from("test.mos")),
503            ),
504        );
505        assert_eq!(doc.len(), 3);
506        assert_eq!(doc.get(doc.root).unwrap().children.len(), 1);
507        assert_eq!(doc.get(para).unwrap().children.len(), 1);
508    }
509}