Skip to main content

mos_parse/
directive.rs

1use mos_core::Diagnostic;
2use mos_core::codes;
3
4use crate::parser::Parser;
5use crate::support::{
6    next_char_boundary, normalize_raw_text, skip_set_ws, skip_to_comma, strip_leading_label,
7};
8use crate::{DirectiveKind, Item, LengthUnit, RawBlockKind, SetArg, SetValue};
9
10impl Parser<'_> {
11    pub(crate) fn parse_directive_block(&mut self, kw: &'static str) {
12        if kw == "set" {
13            self.parse_set_block();
14        } else if kw == "pre" || kw == "code" {
15            self.parse_raw_block(kw);
16        } else {
17            self.parse_call_block(kw);
18        }
19    }
20
21    fn parse_raw_block(&mut self, kw: &'static str) {
22        let (line_start, _content_end, _line_end) = self.current_line_bounds();
23        let bytes = self.src.as_bytes();
24        debug_assert!(self.src[line_start + 1..].starts_with(kw));
25        let mut i = line_start + 1 + kw.len();
26        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
27            i += 1;
28        }
29        let mut args = Vec::new();
30        if i < bytes.len() && bytes[i] == b'(' {
31            let Some(args_end) = self.scan_balanced_parens(i) else {
32                self.diagnostics.push(
33                    Diagnostic::simple(
34                        &codes::MOS0016,
35                        None,
36                        format!("unterminated `#{kw}(...)` block"),
37                    )
38                    .with_span(self.span(line_start, bytes.len())),
39                );
40                self.pos = bytes.len();
41                return;
42            };
43            args = self.parse_set_body(i + 1, args_end - 1, true);
44            i = args_end;
45            while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
46                i += 1;
47            }
48        }
49        if i >= bytes.len() || bytes[i] != b'[' {
50            self.diagnostics.push(
51                Diagnostic::simple(
52                    &codes::MOS0013,
53                    None,
54                    format!(
55                        "expected long-bracket raw body after `#{kw}` (for example `#{kw}[[...]]`)"
56                    ),
57                )
58                .with_span(self.span(line_start, i)),
59            );
60            self.skip_line();
61            return;
62        }
63        let Some((body_start, eq_count)) = self.scan_long_raw_open(i) else {
64            self.diagnostics.push(
65                Diagnostic::simple(
66                    &codes::MOS0013,
67                    None,
68                    format!("raw `#{kw}` blocks require long brackets like `#{kw}[[...]]`"),
69                )
70                .with_span(self.span(line_start, i + 1)),
71            );
72            self.skip_line();
73            return;
74        };
75        if let Some((body_end, close_end)) = self.scan_long_raw_close(body_start, eq_count) {
76            let text = normalize_raw_text(&self.src[body_start..body_end]);
77            let (_, content_end, _) = self.line_bounds_from(close_end);
78            let (after_label, parsed_label) = strip_leading_label(self.src, close_end, content_end);
79            let label_span = parsed_label
80                .as_ref()
81                .map(|label| self.span(label.start, label.end));
82            let label = parsed_label.map(|label| label.text);
83            let kind = if kw == "code" {
84                RawBlockKind::Code
85            } else {
86                RawBlockKind::Pre
87            };
88            self.items.push(Item::RawBlock {
89                kind,
90                args,
91                text,
92                label,
93                label_span,
94                span: self.span(line_start, after_label),
95            });
96            self.pos = after_label;
97            while self.pos < bytes.len() && (bytes[self.pos] == b' ' || bytes[self.pos] == b'\t') {
98                self.pos += 1;
99            }
100            if self.pos >= bytes.len() {
101            } else if bytes[self.pos] == b'\n' {
102                self.pos += 1;
103            } else if bytes[self.pos] == b'\r' && bytes.get(self.pos + 1) == Some(&b'\n') {
104                self.pos += 2;
105            } else {
106                self.diagnostics.push(
107                    Diagnostic::simple(
108                        &codes::MOS0019,
109                        None,
110                        format!("unexpected trailing content after raw `#{kw}` block"),
111                    )
112                    .with_span(self.span(self.pos, content_end)),
113                );
114            }
115        } else {
116            self.diagnostics.push(
117                Diagnostic::simple(
118                    &codes::MOS0016,
119                    None,
120                    format!("unterminated raw `#{kw}` long-bracket block"),
121                )
122                .with_span(self.span(line_start, bytes.len())),
123            );
124            self.pos = bytes.len();
125        }
126    }
127
128    fn parse_set_block(&mut self) {
129        let (line_start, _content_end, _line_end) = self.current_line_bounds();
130        let bytes = self.src.as_bytes();
131        debug_assert!(self.src[line_start..].starts_with("#set"));
132        let mut i = line_start + "#set".len();
133        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
134            i += 1;
135        }
136        let name_start = i;
137        while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
138            i += 1;
139        }
140        let name = self.src[name_start..i].to_owned();
141        if name.is_empty() {
142            self.diagnostics.push(
143                Diagnostic::simple(&codes::MOS0010, None, "expected an identifier after `#set`")
144                    .with_span(self.span(line_start, line_start + "#set".len())),
145            );
146            self.skip_line();
147            return;
148        }
149        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
150            i += 1;
151        }
152        if i >= bytes.len() || bytes[i] != b'(' {
153            self.diagnostics.push(
154                Diagnostic::simple(
155                    &codes::MOS0013,
156                    None,
157                    format!("expected `(` after `#set {name}`"),
158                )
159                .with_span(self.span(name_start, i)),
160            );
161            self.skip_line();
162            return;
163        }
164        self.finish_directive_block(line_start, i, DirectiveKind::Set, name, "set", false);
165    }
166
167    fn parse_call_block(&mut self, kw: &'static str) {
168        let (line_start, _content_end, _line_end) = self.current_line_bounds();
169        let bytes = self.src.as_bytes();
170        debug_assert!(self.src[line_start + 1..].starts_with(kw));
171        let mut i = line_start + 1 + kw.len();
172        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
173            i += 1;
174        }
175        if i >= bytes.len() || bytes[i] != b'(' {
176            self.diagnostics.push(
177                Diagnostic::simple(&codes::MOS0013, None, format!("expected `(` after `#{kw}`"))
178                    .with_span(self.span(line_start, i)),
179            );
180            self.skip_line();
181            return;
182        }
183        let kind = match kw {
184            "image" => DirectiveKind::Image,
185            "figure" => DirectiveKind::Figure,
186            "bibliography" => DirectiveKind::Bibliography,
187            other => {
188                debug_assert!(false, "parse_call_block: unexpected keyword `{other}`");
189                DirectiveKind::Set
190            }
191        };
192        self.finish_directive_block(line_start, i, kind, kw.to_owned(), kw, true);
193    }
194
195    fn finish_directive_block(
196        &mut self,
197        line_start: usize,
198        paren_pos: usize,
199        kind: DirectiveKind,
200        name: String,
201        display_kw: &str,
202        allow_positional: bool,
203    ) {
204        let bytes = self.src.as_bytes();
205        if let Some(end) = self.scan_balanced_parens(paren_pos) {
206            let args = self.parse_set_body(paren_pos + 1, end - 1, allow_positional);
207            self.items.push(Item::Set {
208                kind,
209                name,
210                args,
211                span: self.span(line_start, end),
212            });
213            self.pos = end;
214            while self.pos < bytes.len() && (bytes[self.pos] == b' ' || bytes[self.pos] == b'\t') {
215                self.pos += 1;
216            }
217            if self.pos >= bytes.len() {
218            } else if bytes[self.pos] == b'\n' {
219                self.pos += 1;
220            } else if bytes[self.pos] == b'\r' && bytes.get(self.pos + 1) == Some(&b'\n') {
221                self.pos += 2;
222            } else {
223                let (_, content_end, _) = self.current_line_bounds();
224                self.diagnostics.push(
225                    Diagnostic::simple(
226                        &codes::MOS0019,
227                        None,
228                        format!("unexpected trailing content after `#{display_kw} ... )`"),
229                    )
230                    .with_span(self.span(self.pos, content_end)),
231                );
232            }
233        } else {
234            self.diagnostics.push(
235                Diagnostic::simple(
236                    &codes::MOS0016,
237                    None,
238                    format!("unterminated `#{display_kw}(...)` block"),
239                )
240                .with_span(self.span(line_start, bytes.len())),
241            );
242            self.pos = bytes.len();
243        }
244    }
245
246    fn scan_balanced_parens(&self, start: usize) -> Option<usize> {
247        let bytes = self.src.as_bytes();
248        debug_assert_eq!(bytes.get(start), Some(&b'('));
249        let mut depth: u32 = 0;
250        let mut i = start;
251        let mut in_string = false;
252        while i < bytes.len() {
253            let b = bytes[i];
254            if in_string {
255                if b == b'\\' && i + 1 < bytes.len() {
256                    i += 2;
257                    continue;
258                }
259                if b == b'"' {
260                    in_string = false;
261                }
262                i += 1;
263                continue;
264            }
265            match b {
266                b'"' => in_string = true,
267                b'(' => depth += 1,
268                b')' => {
269                    depth -= 1;
270                    if depth == 0 {
271                        return Some(i + 1);
272                    }
273                }
274                _ => {}
275            }
276            i += 1;
277        }
278        None
279    }
280
281    fn scan_long_raw_open(&self, start: usize) -> Option<(usize, usize)> {
282        let bytes = self.src.as_bytes();
283        debug_assert_eq!(bytes.get(start), Some(&b'['));
284        let mut i = start + 1;
285        while i < bytes.len() && bytes[i] == b'=' {
286            i += 1;
287        }
288        if i >= bytes.len() || bytes[i] != b'[' {
289            return None;
290        }
291        Some((i + 1, i - start - 1))
292    }
293
294    fn scan_long_raw_close(&self, start: usize, eq_count: usize) -> Option<(usize, usize)> {
295        let bytes = self.src.as_bytes();
296        let mut i = start;
297        while i < bytes.len() {
298            if bytes[i] == b']' {
299                let eq_start = i + 1;
300                let eq_end = eq_start + eq_count;
301                if eq_end < bytes.len()
302                    && bytes[eq_start..eq_end].iter().all(|b| *b == b'=')
303                    && bytes[eq_end] == b']'
304                {
305                    return Some((i, eq_end + 1));
306                }
307            }
308            i += 1;
309        }
310        None
311    }
312
313    fn parse_set_body(&mut self, start: usize, end: usize, allow_positional: bool) -> Vec<SetArg> {
314        let bytes = self.src.as_bytes();
315        let mut args: Vec<SetArg> = Vec::new();
316        let mut i = start;
317        let mut first = true;
318        loop {
319            i = skip_set_ws(bytes, i, end);
320            if i >= end {
321                break;
322            }
323            if allow_positional && first && bytes[i] == b'"' {
324                let value_start = i;
325                let parsed = self.parse_set_value(&mut i, end);
326                let value_span = self.span(value_start, i);
327                if let Some(value) = parsed {
328                    args.push(SetArg::Positional { value, value_span });
329                }
330                first = false;
331                i = self.consume_arg_separator(bytes, i, end);
332                continue;
333            }
334            first = false;
335            let key_start = i;
336            while i < end && (bytes[i].is_ascii_alphanumeric() || matches!(bytes[i], b'_' | b'-')) {
337                i += 1;
338            }
339            if i == key_start {
340                self.diagnostics.push(
341                    Diagnostic::simple(
342                        &codes::MOS0025,
343                        None,
344                        "expected `key: value` in directive arguments",
345                    )
346                    .with_span(self.span(i, (i + 1).min(end))),
347                );
348                i = skip_to_comma(bytes, i, end);
349                if i < end && bytes[i] == b',' {
350                    i += 1;
351                }
352                continue;
353            }
354            let key = self.src[key_start..i].to_owned();
355            let key_span = self.span(key_start, i);
356            i = skip_set_ws(bytes, i, end);
357            if i >= end || bytes[i] != b':' {
358                self.diagnostics.push(
359                    Diagnostic::simple(
360                        &codes::MOS0025,
361                        None,
362                        format!("expected `:` after `{key}` in directive arguments"),
363                    )
364                    .with_span(key_span.clone()),
365                );
366                i = skip_to_comma(bytes, i, end);
367                if i < end && bytes[i] == b',' {
368                    i += 1;
369                }
370                continue;
371            }
372            i += 1;
373            i = skip_set_ws(bytes, i, end);
374            let value_start = i;
375            let parsed = self.parse_set_value(&mut i, end);
376            let value_span = self.span(value_start, i);
377            if let Some(value) = parsed {
378                args.push(SetArg::Named {
379                    key,
380                    value,
381                    key_span,
382                    value_span,
383                });
384            }
385            i = self.consume_arg_separator(bytes, i, end);
386        }
387        args
388    }
389
390    fn consume_arg_separator(&mut self, bytes: &[u8], mut i: usize, end: usize) -> usize {
391        i = skip_set_ws(bytes, i, end);
392        if i < end {
393            if bytes[i] == b',' {
394                i += 1;
395            } else {
396                self.diagnostics.push(
397                    Diagnostic::simple(
398                        &codes::MOS0025,
399                        None,
400                        "expected `,` or `)` between directive arguments",
401                    )
402                    .with_span(self.span(i, (i + 1).min(end))),
403                );
404                i = skip_to_comma(bytes, i, end);
405                if i < end && bytes[i] == b',' {
406                    i += 1;
407                }
408            }
409        }
410        i
411    }
412
413    fn parse_set_value(&mut self, i: &mut usize, end: usize) -> Option<SetValue> {
414        let bytes = self.src.as_bytes();
415        if *i >= end {
416            self.diagnostics.push(
417                Diagnostic::simple(
418                    &codes::MOS0022,
419                    None,
420                    "expected a value in directive arguments",
421                )
422                .with_span(self.span(*i, *i)),
423            );
424            return None;
425        }
426        let b = bytes[*i];
427        if b == b'"' {
428            return self.parse_string_value(i, end);
429        }
430        if b == b'-' || b.is_ascii_digit() {
431            return self.parse_number_value(i, end);
432        }
433        if b.is_ascii_alphabetic() {
434            let id_start = *i;
435            while *i < end
436                && (bytes[*i].is_ascii_alphanumeric() || matches!(bytes[*i], b'_' | b'-'))
437            {
438                *i += 1;
439            }
440            return Some(SetValue::Ident(self.src[id_start..*i].to_owned()));
441        }
442        self.diagnostics.push(
443            Diagnostic::simple(
444                &codes::MOS0022,
445                None,
446                format!("unexpected character `{}` in directive value", b as char),
447            )
448            .with_span(self.span(*i, *i + 1)),
449        );
450        *i += 1;
451        None
452    }
453
454    fn parse_string_value(&mut self, i: &mut usize, end: usize) -> Option<SetValue> {
455        let bytes = self.src.as_bytes();
456        let start = *i;
457        *i += 1;
458        let mut out = String::new();
459        while *i < end {
460            let c = bytes[*i];
461            if c == b'\\' && *i + 1 < end {
462                let esc = bytes[*i + 1];
463                match esc {
464                    b'\\' => out.push('\\'),
465                    b'"' => out.push('"'),
466                    b'n' => out.push('\n'),
467                    b't' => out.push('\t'),
468                    b'r' => out.push('\r'),
469                    _ => {
470                        let esc_start = *i + 1;
471                        let esc_end = next_char_boundary(self.src, esc_start);
472                        self.diagnostics.push(
473                            Diagnostic::simple(
474                                &codes::MOS0022,
475                                None,
476                                format!(
477                                    "unknown escape sequence `\\{}` in string",
478                                    &self.src[esc_start..esc_end]
479                                ),
480                            )
481                            .with_span(self.span(*i, esc_end)),
482                        );
483                        out.push_str(&self.src[esc_start..esc_end]);
484                        *i = esc_end;
485                        continue;
486                    }
487                }
488                *i += 2;
489                continue;
490            }
491            if c == b'"' {
492                *i += 1;
493                return Some(SetValue::Str(out));
494            }
495            let ch_start = *i;
496            let ch_end = next_char_boundary(self.src, ch_start);
497            out.push_str(&self.src[ch_start..ch_end]);
498            *i = ch_end;
499        }
500        self.diagnostics.push(
501            Diagnostic::simple(&codes::MOS0022, None, "unterminated string literal")
502                .with_span(self.span(start, end)),
503        );
504        None
505    }
506
507    fn parse_number_value(&mut self, i: &mut usize, end: usize) -> Option<SetValue> {
508        let bytes = self.src.as_bytes();
509        let num_start = *i;
510        if bytes[*i] == b'-' {
511            *i += 1;
512        }
513        let int_start = *i;
514        while *i < end && bytes[*i].is_ascii_digit() {
515            *i += 1;
516        }
517        let mut is_float = false;
518        if *i < end && bytes[*i] == b'.' && *i + 1 < end && bytes[*i + 1].is_ascii_digit() {
519            is_float = true;
520            *i += 1;
521            while *i < end && bytes[*i].is_ascii_digit() {
522                *i += 1;
523            }
524        }
525        if *i == int_start {
526            self.diagnostics.push(
527                Diagnostic::simple(
528                    &codes::MOS0022,
529                    None,
530                    "expected a number after `-` in directive value",
531                )
532                .with_span(self.span(num_start, *i)),
533            );
534            return None;
535        }
536        let num_end = *i;
537        let unit_start = *i;
538        while *i < end && bytes[*i].is_ascii_alphabetic() {
539            *i += 1;
540        }
541        let unit = &self.src[unit_start..*i];
542        if unit.is_empty() {
543            let text = &self.src[num_start..num_end];
544            if is_float {
545                return text.parse::<f64>().ok().map(SetValue::Float).or_else(|| {
546                    self.diagnostics.push(
547                        Diagnostic::simple(
548                            &codes::MOS0022,
549                            None,
550                            format!("malformed number `{text}`"),
551                        )
552                        .with_span(self.span(num_start, num_end)),
553                    );
554                    None
555                });
556            }
557            return text.parse::<i64>().ok().map(SetValue::Int).or_else(|| {
558                self.diagnostics.push(
559                    Diagnostic::simple(
560                        &codes::MOS0022,
561                        None,
562                        format!("malformed integer `{text}`"),
563                    )
564                    .with_span(self.span(num_start, num_end)),
565                );
566                None
567            });
568        }
569        let length_unit = match unit {
570            "mm" => LengthUnit::Mm,
571            "pt" => LengthUnit::Pt,
572            "em" => LengthUnit::Em,
573            _ => {
574                self.diagnostics.push(
575                    Diagnostic::simple(
576                        &codes::MOS0022,
577                        None,
578                        format!("unknown length unit `{unit}` (expected mm, pt, or em)"),
579                    )
580                    .with_span(self.span(unit_start, *i)),
581                );
582                return None;
583            }
584        };
585        let value = self.src[num_start..num_end].parse::<f64>().ok();
586        value.map(|v| SetValue::Length(v, length_unit)).or_else(|| {
587            self.diagnostics.push(
588                Diagnostic::simple(
589                    &codes::MOS0022,
590                    None,
591                    format!("malformed length value `{}`", &self.src[num_start..num_end]),
592                )
593                .with_span(self.span(num_start, num_end)),
594            );
595            None
596        })
597    }
598}