1use mos_core::Diagnostic;
2use mos_core::codes;
3
4use crate::parser::Parser;
5use crate::support::{
6 next_char_boundary, normalize_raw_text, skip_set_ws, skip_to_comma, strip_leading_label,
7};
8use crate::{DirectiveKind, Item, LengthUnit, RawBlockKind, SetArg, SetValue};
9
10impl Parser<'_> {
11 pub(crate) fn parse_directive_block(&mut self, kw: &'static str) {
12 if kw == "set" {
13 self.parse_set_block();
14 } else if kw == "pre" || kw == "code" {
15 self.parse_raw_block(kw);
16 } else {
17 self.parse_call_block(kw);
18 }
19 }
20
21 fn parse_raw_block(&mut self, kw: &'static str) {
22 let (line_start, _content_end, _line_end) = self.current_line_bounds();
23 let bytes = self.src.as_bytes();
24 debug_assert!(self.src[line_start + 1..].starts_with(kw));
25 let mut i = line_start + 1 + kw.len();
26 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
27 i += 1;
28 }
29 let mut args = Vec::new();
30 if i < bytes.len() && bytes[i] == b'(' {
31 let Some(args_end) = self.scan_balanced_parens(i) else {
32 self.diagnostics.push(
33 Diagnostic::simple(
34 &codes::MOS0016,
35 None,
36 format!("unterminated `#{kw}(...)` block"),
37 )
38 .with_span(self.span(line_start, bytes.len())),
39 );
40 self.pos = bytes.len();
41 return;
42 };
43 args = self.parse_set_body(i + 1, args_end - 1, true);
44 i = args_end;
45 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
46 i += 1;
47 }
48 }
49 if i >= bytes.len() || bytes[i] != b'[' {
50 self.diagnostics.push(
51 Diagnostic::simple(
52 &codes::MOS0013,
53 None,
54 format!(
55 "expected long-bracket raw body after `#{kw}` (for example `#{kw}[[...]]`)"
56 ),
57 )
58 .with_span(self.span(line_start, i)),
59 );
60 self.skip_line();
61 return;
62 }
63 let Some((body_start, eq_count)) = self.scan_long_raw_open(i) else {
64 self.diagnostics.push(
65 Diagnostic::simple(
66 &codes::MOS0013,
67 None,
68 format!("raw `#{kw}` blocks require long brackets like `#{kw}[[...]]`"),
69 )
70 .with_span(self.span(line_start, i + 1)),
71 );
72 self.skip_line();
73 return;
74 };
75 if let Some((body_end, close_end)) = self.scan_long_raw_close(body_start, eq_count) {
76 let text = normalize_raw_text(&self.src[body_start..body_end]);
77 let (_, content_end, _) = self.line_bounds_from(close_end);
78 let (after_label, parsed_label) = strip_leading_label(self.src, close_end, content_end);
79 let label_span = parsed_label
80 .as_ref()
81 .map(|label| self.span(label.start, label.end));
82 let label = parsed_label.map(|label| label.text);
83 let kind = if kw == "code" {
84 RawBlockKind::Code
85 } else {
86 RawBlockKind::Pre
87 };
88 self.items.push(Item::RawBlock {
89 kind,
90 args,
91 text,
92 label,
93 label_span,
94 span: self.span(line_start, after_label),
95 });
96 self.pos = after_label;
97 while self.pos < bytes.len() && (bytes[self.pos] == b' ' || bytes[self.pos] == b'\t') {
98 self.pos += 1;
99 }
100 if self.pos >= bytes.len() {
101 } else if bytes[self.pos] == b'\n' {
102 self.pos += 1;
103 } else if bytes[self.pos] == b'\r' && bytes.get(self.pos + 1) == Some(&b'\n') {
104 self.pos += 2;
105 } else {
106 self.diagnostics.push(
107 Diagnostic::simple(
108 &codes::MOS0019,
109 None,
110 format!("unexpected trailing content after raw `#{kw}` block"),
111 )
112 .with_span(self.span(self.pos, content_end)),
113 );
114 }
115 } else {
116 self.diagnostics.push(
117 Diagnostic::simple(
118 &codes::MOS0016,
119 None,
120 format!("unterminated raw `#{kw}` long-bracket block"),
121 )
122 .with_span(self.span(line_start, bytes.len())),
123 );
124 self.pos = bytes.len();
125 }
126 }
127
128 fn parse_set_block(&mut self) {
129 let (line_start, _content_end, _line_end) = self.current_line_bounds();
130 let bytes = self.src.as_bytes();
131 debug_assert!(self.src[line_start..].starts_with("#set"));
132 let mut i = line_start + "#set".len();
133 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
134 i += 1;
135 }
136 let name_start = i;
137 while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
138 i += 1;
139 }
140 let name = self.src[name_start..i].to_owned();
141 if name.is_empty() {
142 self.diagnostics.push(
143 Diagnostic::simple(&codes::MOS0010, None, "expected an identifier after `#set`")
144 .with_span(self.span(line_start, line_start + "#set".len())),
145 );
146 self.skip_line();
147 return;
148 }
149 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
150 i += 1;
151 }
152 if i >= bytes.len() || bytes[i] != b'(' {
153 self.diagnostics.push(
154 Diagnostic::simple(
155 &codes::MOS0013,
156 None,
157 format!("expected `(` after `#set {name}`"),
158 )
159 .with_span(self.span(name_start, i)),
160 );
161 self.skip_line();
162 return;
163 }
164 self.finish_directive_block(line_start, i, DirectiveKind::Set, name, "set", false);
165 }
166
167 fn parse_call_block(&mut self, kw: &'static str) {
168 let (line_start, _content_end, _line_end) = self.current_line_bounds();
169 let bytes = self.src.as_bytes();
170 debug_assert!(self.src[line_start + 1..].starts_with(kw));
171 let mut i = line_start + 1 + kw.len();
172 while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
173 i += 1;
174 }
175 if i >= bytes.len() || bytes[i] != b'(' {
176 self.diagnostics.push(
177 Diagnostic::simple(&codes::MOS0013, None, format!("expected `(` after `#{kw}`"))
178 .with_span(self.span(line_start, i)),
179 );
180 self.skip_line();
181 return;
182 }
183 let kind = match kw {
184 "image" => DirectiveKind::Image,
185 "figure" => DirectiveKind::Figure,
186 "bibliography" => DirectiveKind::Bibliography,
187 other => {
188 debug_assert!(false, "parse_call_block: unexpected keyword `{other}`");
189 DirectiveKind::Set
190 }
191 };
192 self.finish_directive_block(line_start, i, kind, kw.to_owned(), kw, true);
193 }
194
195 fn finish_directive_block(
196 &mut self,
197 line_start: usize,
198 paren_pos: usize,
199 kind: DirectiveKind,
200 name: String,
201 display_kw: &str,
202 allow_positional: bool,
203 ) {
204 let bytes = self.src.as_bytes();
205 if let Some(end) = self.scan_balanced_parens(paren_pos) {
206 let args = self.parse_set_body(paren_pos + 1, end - 1, allow_positional);
207 self.items.push(Item::Set {
208 kind,
209 name,
210 args,
211 span: self.span(line_start, end),
212 });
213 self.pos = end;
214 while self.pos < bytes.len() && (bytes[self.pos] == b' ' || bytes[self.pos] == b'\t') {
215 self.pos += 1;
216 }
217 if self.pos >= bytes.len() {
218 } else if bytes[self.pos] == b'\n' {
219 self.pos += 1;
220 } else if bytes[self.pos] == b'\r' && bytes.get(self.pos + 1) == Some(&b'\n') {
221 self.pos += 2;
222 } else {
223 let (_, content_end, _) = self.current_line_bounds();
224 self.diagnostics.push(
225 Diagnostic::simple(
226 &codes::MOS0019,
227 None,
228 format!("unexpected trailing content after `#{display_kw} ... )`"),
229 )
230 .with_span(self.span(self.pos, content_end)),
231 );
232 }
233 } else {
234 self.diagnostics.push(
235 Diagnostic::simple(
236 &codes::MOS0016,
237 None,
238 format!("unterminated `#{display_kw}(...)` block"),
239 )
240 .with_span(self.span(line_start, bytes.len())),
241 );
242 self.pos = bytes.len();
243 }
244 }
245
246 fn scan_balanced_parens(&self, start: usize) -> Option<usize> {
247 let bytes = self.src.as_bytes();
248 debug_assert_eq!(bytes.get(start), Some(&b'('));
249 let mut depth: u32 = 0;
250 let mut i = start;
251 let mut in_string = false;
252 while i < bytes.len() {
253 let b = bytes[i];
254 if in_string {
255 if b == b'\\' && i + 1 < bytes.len() {
256 i += 2;
257 continue;
258 }
259 if b == b'"' {
260 in_string = false;
261 }
262 i += 1;
263 continue;
264 }
265 match b {
266 b'"' => in_string = true,
267 b'(' => depth += 1,
268 b')' => {
269 depth -= 1;
270 if depth == 0 {
271 return Some(i + 1);
272 }
273 }
274 _ => {}
275 }
276 i += 1;
277 }
278 None
279 }
280
281 fn scan_long_raw_open(&self, start: usize) -> Option<(usize, usize)> {
282 let bytes = self.src.as_bytes();
283 debug_assert_eq!(bytes.get(start), Some(&b'['));
284 let mut i = start + 1;
285 while i < bytes.len() && bytes[i] == b'=' {
286 i += 1;
287 }
288 if i >= bytes.len() || bytes[i] != b'[' {
289 return None;
290 }
291 Some((i + 1, i - start - 1))
292 }
293
294 fn scan_long_raw_close(&self, start: usize, eq_count: usize) -> Option<(usize, usize)> {
295 let bytes = self.src.as_bytes();
296 let mut i = start;
297 while i < bytes.len() {
298 if bytes[i] == b']' {
299 let eq_start = i + 1;
300 let eq_end = eq_start + eq_count;
301 if eq_end < bytes.len()
302 && bytes[eq_start..eq_end].iter().all(|b| *b == b'=')
303 && bytes[eq_end] == b']'
304 {
305 return Some((i, eq_end + 1));
306 }
307 }
308 i += 1;
309 }
310 None
311 }
312
313 fn parse_set_body(&mut self, start: usize, end: usize, allow_positional: bool) -> Vec<SetArg> {
314 let bytes = self.src.as_bytes();
315 let mut args: Vec<SetArg> = Vec::new();
316 let mut i = start;
317 let mut first = true;
318 loop {
319 i = skip_set_ws(bytes, i, end);
320 if i >= end {
321 break;
322 }
323 if allow_positional && first && bytes[i] == b'"' {
324 let value_start = i;
325 let parsed = self.parse_set_value(&mut i, end);
326 let value_span = self.span(value_start, i);
327 if let Some(value) = parsed {
328 args.push(SetArg::Positional { value, value_span });
329 }
330 first = false;
331 i = self.consume_arg_separator(bytes, i, end);
332 continue;
333 }
334 first = false;
335 let key_start = i;
336 while i < end && (bytes[i].is_ascii_alphanumeric() || matches!(bytes[i], b'_' | b'-')) {
337 i += 1;
338 }
339 if i == key_start {
340 self.diagnostics.push(
341 Diagnostic::simple(
342 &codes::MOS0025,
343 None,
344 "expected `key: value` in directive arguments",
345 )
346 .with_span(self.span(i, (i + 1).min(end))),
347 );
348 i = skip_to_comma(bytes, i, end);
349 if i < end && bytes[i] == b',' {
350 i += 1;
351 }
352 continue;
353 }
354 let key = self.src[key_start..i].to_owned();
355 let key_span = self.span(key_start, i);
356 i = skip_set_ws(bytes, i, end);
357 if i >= end || bytes[i] != b':' {
358 self.diagnostics.push(
359 Diagnostic::simple(
360 &codes::MOS0025,
361 None,
362 format!("expected `:` after `{key}` in directive arguments"),
363 )
364 .with_span(key_span.clone()),
365 );
366 i = skip_to_comma(bytes, i, end);
367 if i < end && bytes[i] == b',' {
368 i += 1;
369 }
370 continue;
371 }
372 i += 1;
373 i = skip_set_ws(bytes, i, end);
374 let value_start = i;
375 let parsed = self.parse_set_value(&mut i, end);
376 let value_span = self.span(value_start, i);
377 if let Some(value) = parsed {
378 args.push(SetArg::Named {
379 key,
380 value,
381 key_span,
382 value_span,
383 });
384 }
385 i = self.consume_arg_separator(bytes, i, end);
386 }
387 args
388 }
389
390 fn consume_arg_separator(&mut self, bytes: &[u8], mut i: usize, end: usize) -> usize {
391 i = skip_set_ws(bytes, i, end);
392 if i < end {
393 if bytes[i] == b',' {
394 i += 1;
395 } else {
396 self.diagnostics.push(
397 Diagnostic::simple(
398 &codes::MOS0025,
399 None,
400 "expected `,` or `)` between directive arguments",
401 )
402 .with_span(self.span(i, (i + 1).min(end))),
403 );
404 i = skip_to_comma(bytes, i, end);
405 if i < end && bytes[i] == b',' {
406 i += 1;
407 }
408 }
409 }
410 i
411 }
412
413 fn parse_set_value(&mut self, i: &mut usize, end: usize) -> Option<SetValue> {
414 let bytes = self.src.as_bytes();
415 if *i >= end {
416 self.diagnostics.push(
417 Diagnostic::simple(
418 &codes::MOS0022,
419 None,
420 "expected a value in directive arguments",
421 )
422 .with_span(self.span(*i, *i)),
423 );
424 return None;
425 }
426 let b = bytes[*i];
427 if b == b'"' {
428 return self.parse_string_value(i, end);
429 }
430 if b == b'-' || b.is_ascii_digit() {
431 return self.parse_number_value(i, end);
432 }
433 if b.is_ascii_alphabetic() {
434 let id_start = *i;
435 while *i < end
436 && (bytes[*i].is_ascii_alphanumeric() || matches!(bytes[*i], b'_' | b'-'))
437 {
438 *i += 1;
439 }
440 return Some(SetValue::Ident(self.src[id_start..*i].to_owned()));
441 }
442 self.diagnostics.push(
443 Diagnostic::simple(
444 &codes::MOS0022,
445 None,
446 format!("unexpected character `{}` in directive value", b as char),
447 )
448 .with_span(self.span(*i, *i + 1)),
449 );
450 *i += 1;
451 None
452 }
453
454 fn parse_string_value(&mut self, i: &mut usize, end: usize) -> Option<SetValue> {
455 let bytes = self.src.as_bytes();
456 let start = *i;
457 *i += 1;
458 let mut out = String::new();
459 while *i < end {
460 let c = bytes[*i];
461 if c == b'\\' && *i + 1 < end {
462 let esc = bytes[*i + 1];
463 match esc {
464 b'\\' => out.push('\\'),
465 b'"' => out.push('"'),
466 b'n' => out.push('\n'),
467 b't' => out.push('\t'),
468 b'r' => out.push('\r'),
469 _ => {
470 let esc_start = *i + 1;
471 let esc_end = next_char_boundary(self.src, esc_start);
472 self.diagnostics.push(
473 Diagnostic::simple(
474 &codes::MOS0022,
475 None,
476 format!(
477 "unknown escape sequence `\\{}` in string",
478 &self.src[esc_start..esc_end]
479 ),
480 )
481 .with_span(self.span(*i, esc_end)),
482 );
483 out.push_str(&self.src[esc_start..esc_end]);
484 *i = esc_end;
485 continue;
486 }
487 }
488 *i += 2;
489 continue;
490 }
491 if c == b'"' {
492 *i += 1;
493 return Some(SetValue::Str(out));
494 }
495 let ch_start = *i;
496 let ch_end = next_char_boundary(self.src, ch_start);
497 out.push_str(&self.src[ch_start..ch_end]);
498 *i = ch_end;
499 }
500 self.diagnostics.push(
501 Diagnostic::simple(&codes::MOS0022, None, "unterminated string literal")
502 .with_span(self.span(start, end)),
503 );
504 None
505 }
506
507 fn parse_number_value(&mut self, i: &mut usize, end: usize) -> Option<SetValue> {
508 let bytes = self.src.as_bytes();
509 let num_start = *i;
510 if bytes[*i] == b'-' {
511 *i += 1;
512 }
513 let int_start = *i;
514 while *i < end && bytes[*i].is_ascii_digit() {
515 *i += 1;
516 }
517 let mut is_float = false;
518 if *i < end && bytes[*i] == b'.' && *i + 1 < end && bytes[*i + 1].is_ascii_digit() {
519 is_float = true;
520 *i += 1;
521 while *i < end && bytes[*i].is_ascii_digit() {
522 *i += 1;
523 }
524 }
525 if *i == int_start {
526 self.diagnostics.push(
527 Diagnostic::simple(
528 &codes::MOS0022,
529 None,
530 "expected a number after `-` in directive value",
531 )
532 .with_span(self.span(num_start, *i)),
533 );
534 return None;
535 }
536 let num_end = *i;
537 let unit_start = *i;
538 while *i < end && bytes[*i].is_ascii_alphabetic() {
539 *i += 1;
540 }
541 let unit = &self.src[unit_start..*i];
542 if unit.is_empty() {
543 let text = &self.src[num_start..num_end];
544 if is_float {
545 return text.parse::<f64>().ok().map(SetValue::Float).or_else(|| {
546 self.diagnostics.push(
547 Diagnostic::simple(
548 &codes::MOS0022,
549 None,
550 format!("malformed number `{text}`"),
551 )
552 .with_span(self.span(num_start, num_end)),
553 );
554 None
555 });
556 }
557 return text.parse::<i64>().ok().map(SetValue::Int).or_else(|| {
558 self.diagnostics.push(
559 Diagnostic::simple(
560 &codes::MOS0022,
561 None,
562 format!("malformed integer `{text}`"),
563 )
564 .with_span(self.span(num_start, num_end)),
565 );
566 None
567 });
568 }
569 let length_unit = match unit {
570 "mm" => LengthUnit::Mm,
571 "pt" => LengthUnit::Pt,
572 "em" => LengthUnit::Em,
573 _ => {
574 self.diagnostics.push(
575 Diagnostic::simple(
576 &codes::MOS0022,
577 None,
578 format!("unknown length unit `{unit}` (expected mm, pt, or em)"),
579 )
580 .with_span(self.span(unit_start, *i)),
581 );
582 return None;
583 }
584 };
585 let value = self.src[num_start..num_end].parse::<f64>().ok();
586 value.map(|v| SetValue::Length(v, length_unit)).or_else(|| {
587 self.diagnostics.push(
588 Diagnostic::simple(
589 &codes::MOS0022,
590 None,
591 format!("malformed length value `{}`", &self.src[num_start..num_end]),
592 )
593 .with_span(self.span(num_start, num_end)),
594 );
595 None
596 })
597 }
598}