1use std::collections::{BTreeMap, BTreeSet};
22use std::fs;
23use std::path::{Path, PathBuf};
24
25use mos_bib::Bibliography;
26use mos_core::{
27 AttrMap, AttrValue, Diagnostic, Document, NodeId, NodeKind, NodeSpec, SourceSpan, Suggestion,
28 codes,
29};
30use mos_parse::{SetArg, SetValue};
31
32pub(super) fn lower_bibliography_directive(
39 document: &mut Document,
40 root: NodeId,
41 args: &[SetArg],
42 span: &SourceSpan,
43 source_file: &Path,
44 diagnostics: &mut Vec<Diagnostic>,
45) {
46 let Some(path) = bibliography_path(args, span, diagnostics) else {
47 return;
48 };
49 let resolved = match mos_core::resolve_source_path(&path, source_file) {
50 Ok(resolved) => resolved,
51 Err(err) => {
52 diagnostics.push(
53 Diagnostic::simple(
54 &codes::MOS0049,
55 None,
56 format!("cannot use bibliography path `{path}`: {err}"),
57 )
58 .with_span(span.clone()),
59 );
60 return;
61 }
62 };
63 if !resolved.is_file() {
69 diagnostics.push(
70 Diagnostic::simple(
71 &codes::MOS0041,
72 None,
73 format!(
74 "declared bibliography source `{}` was not found",
75 mos_core::display_path(&resolved)
76 ),
77 )
78 .with_span(span.clone()),
79 );
80 }
81 let mut attributes: AttrMap = BTreeMap::new();
82 attributes.insert("src".to_owned(), AttrValue::Str(path));
83 attributes.insert(
84 "resolved_path".to_owned(),
85 AttrValue::Str(resolved.to_string_lossy().into_owned()),
86 );
87 document.alloc_child(
88 root,
89 NodeSpec::new(NodeKind::Bibliography, span.clone()).with_attributes(attributes),
90 );
91}
92
93fn bibliography_path(
99 args: &[SetArg],
100 span: &SourceSpan,
101 diagnostics: &mut Vec<Diagnostic>,
102) -> Option<String> {
103 let mut path: Option<String> = None;
104 let mut invalid_path_arg = false;
105 for arg in args {
106 match arg {
107 SetArg::Positional { value, value_span } => {
110 if let SetValue::Str(s) = value {
111 if path.is_some() {
112 diagnostics.push(
113 Diagnostic::simple(
114 &codes::MOS0042,
115 None,
116 "duplicate path argument for `#bibliography`",
117 )
118 .with_span(value_span.clone()),
119 );
120 } else {
121 path = Some(s.clone());
122 }
123 } else {
124 invalid_path_arg = true;
125 diagnostics.push(
126 Diagnostic::simple(
127 &codes::MOS0020,
128 None,
129 "`#bibliography(...)` expects a string path",
130 )
131 .with_span(value_span.clone()),
132 );
133 }
134 }
135 SetArg::Named {
136 key,
137 value,
138 key_span,
139 value_span,
140 } => match key.as_str() {
141 "src" | "path" => {
142 if let SetValue::Str(s) = value {
143 if path.is_some() {
144 diagnostics.push(
145 Diagnostic::simple(
146 &codes::MOS0042,
147 None,
148 "duplicate path argument for `#bibliography`",
149 )
150 .with_span(value_span.clone()),
151 );
152 } else {
153 path = Some(s.clone());
154 }
155 } else {
156 invalid_path_arg = true;
157 diagnostics.push(
158 Diagnostic::simple(
159 &codes::MOS0020,
160 None,
161 "`#bibliography(...)` expects a string path",
162 )
163 .with_span(value_span.clone()),
164 );
165 }
166 }
167 _ => diagnostics.push(
168 Diagnostic::simple(
169 &codes::MOS0015,
170 None,
171 format!("unknown argument `{key}` for `#bibliography` (valid: src/path)"),
172 )
173 .with_span(key_span.clone()),
174 ),
175 },
176 }
177 }
178 let Some(path) = path else {
179 if invalid_path_arg {
180 return None;
181 }
182 diagnostics.push(
183 Diagnostic::simple(
184 &codes::MOS0040,
185 None,
186 "`#bibliography(...)` requires a path (e.g. `#bibliography(\"refs.bib\")`)",
187 )
188 .with_span(span.clone()),
189 );
190 return None;
191 };
192 if path.trim().is_empty() {
195 diagnostics.push(
196 Diagnostic::simple(
197 &codes::MOS0040,
198 None,
199 "`#bibliography(...)` requires a non-empty path (e.g. `#bibliography(\"refs.bib\")`)",
200 )
201 .with_span(span.clone()),
202 );
203 return None;
204 }
205 Some(path)
206}
207
208pub(super) fn resolve_citations(
225 document: &mut Document,
226 diagnostics: &mut Vec<Diagnostic>,
227) -> BTreeSet<String> {
228 let bibliography = load_bibliography(document, diagnostics);
229 let citation_ids: Vec<NodeId> = document
230 .nodes()
231 .filter(|node| node.kind == NodeKind::Citation)
232 .map(|node| node.id)
233 .collect();
234
235 let mut numbers: BTreeMap<String, usize> = BTreeMap::new();
240
241 for citation_id in citation_ids {
242 let Some(node) = document.get(citation_id) else {
243 continue;
244 };
245 let Some(AttrValue::Str(key)) = node.attributes.get("key").cloned() else {
246 continue;
247 };
248 if bibliography.records.entries.contains_key(&key) {
249 let next_number = numbers.len() + 1;
250 let number = *numbers.entry(key.clone()).or_insert(next_number);
251 if let Some(node) = document.get_mut(citation_id) {
252 node.attributes
253 .insert("resolved".to_owned(), AttrValue::Bool(true));
254 node.attributes
255 .insert("text".to_owned(), AttrValue::Str(format!("[{number}]")));
256 if let Some(origin) = bibliography.origins.get(&key) {
257 node.attributes.insert(
258 "target_path".to_owned(),
259 AttrValue::Str(origin.path.to_string_lossy().into_owned()),
260 );
261 if let (Ok(start), Ok(end)) = (
262 i64::try_from(origin.key_span.start()),
263 i64::try_from(origin.key_span.end()),
264 ) {
265 node.attributes
266 .insert("target_span.start".to_owned(), AttrValue::Int(start));
267 node.attributes
268 .insert("target_span.end".to_owned(), AttrValue::Int(end));
269 }
270 }
271 }
272 continue;
273 }
274 if !bibliography.complete {
275 continue;
276 }
277 let mut diagnostic = Diagnostic::simple(
278 &codes::MOS0045,
279 Some(node.span.clone()),
280 format!("unknown citation key `{key}` in bibliography records"),
281 )
282 .with_annotation(mos_core::DiagnosticAnnotation::Hint(
283 "declare the key in a `#bibliography(...)` BibTeX source".to_owned(),
284 ));
285 if let Some(candidate) = nearest_citation_key(&key, &bibliography.records.entries)
286 && let Some(span) = citation_key_span(node, &key)
287 {
288 diagnostic = diagnostic.with_suggestion(Suggestion::new(span, candidate));
289 }
290 diagnostics.push(diagnostic);
291 }
292
293 bibliography.records.entries.keys().cloned().collect()
294}
295
296fn citation_key_span(node: &mos_core::Node, key: &str) -> Option<SourceSpan> {
297 let start = node.span.start().checked_add(2)?;
298 let end = start.checked_add(key.len())?;
299 (end < node.span.end()).then(|| SourceSpan::new(node.span.file.clone(), start, end))
300}
301
302fn is_citation_key(key: &str) -> bool {
303 !key.is_empty()
304 && key
305 .bytes()
306 .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'-' | b':' | b'.'))
307}
308
309fn edit_distance(a: &str, b: &str) -> usize {
310 let b = b.as_bytes();
311 let mut row: Vec<usize> = (0..=b.len()).collect();
312 for (i, &ai) in a.as_bytes().iter().enumerate() {
313 let mut diag = row[0];
314 row[0] = i + 1;
315 for (j, &bj) in b.iter().enumerate() {
316 let cost = usize::from(ai != bj);
317 let sub = diag + cost;
318 diag = row[j + 1];
319 row[j + 1] = sub.min(row[j + 1] + 1).min(row[j] + 1);
320 }
321 }
322 row[b.len()]
323}
324
325fn nearest_citation_key(
326 unknown: &str,
327 records: &BTreeMap<String, mos_bib::BibEntry>,
328) -> Option<String> {
329 if unknown.len() < 3 {
330 return None;
331 }
332 let max_distance = unknown.len() / 3;
333 let mut best: Option<(usize, &str)> = None;
334 let mut tied = false;
335 for key in records.keys().filter(|key| is_citation_key(key)) {
336 let distance = edit_distance(unknown, key);
337 if distance > max_distance {
338 continue;
339 }
340 match best {
341 None => {
342 best = Some((distance, key.as_str()));
343 tied = false;
344 }
345 Some((best_distance, _)) if distance < best_distance => {
346 best = Some((distance, key.as_str()));
347 tied = false;
348 }
349 Some((best_distance, _)) if distance == best_distance => tied = true,
350 Some(_) => {}
351 }
352 }
353 let (_, key) = best?;
354 (!tied).then(|| key.to_owned())
355}
356
357struct LoadedBibliography {
358 records: Bibliography,
359 origins: BTreeMap<String, BibliographyOrigin>,
360 complete: bool,
361}
362
363struct BibliographyOrigin {
364 path: PathBuf,
365 key_span: SourceSpan,
366}
367
368fn load_bibliography(document: &Document, diagnostics: &mut Vec<Diagnostic>) -> LoadedBibliography {
369 let mut merged = Bibliography::default();
370 let mut origins: BTreeMap<String, BibliographyOrigin> = BTreeMap::new();
371 let mut complete = true;
372 for node in document
373 .nodes()
374 .filter(|node| node.kind == NodeKind::Bibliography)
375 {
376 let Some(AttrValue::Str(path)) = node.attributes.get("resolved_path") else {
377 complete = false;
378 continue;
379 };
380 let path_buf = PathBuf::from(path);
381 if !path_buf.is_file() {
382 complete = false;
383 continue;
384 }
385 let source = match fs::read_to_string(&path_buf) {
386 Ok(source) => source,
387 Err(err) => {
388 complete = false;
389 diagnostics.push(Diagnostic::simple(
390 &codes::MOS0041,
391 Some(node.span.clone()),
392 format!(
393 "declared bibliography source `{}` could not be read: {err}",
394 mos_core::display_path(&path_buf)
395 ),
396 ));
397 continue;
398 }
399 };
400 match mos_bib::parse_bibtex(&source) {
401 Ok(parsed) => {
402 for (key, entry) in parsed.entries {
403 let key_span =
404 SourceSpan::new(path_buf.clone(), entry.key_span.start, entry.key_span.end);
405 if let Some(first) = origins.get(&key) {
406 diagnostics.push(
407 Diagnostic::simple(
408 &codes::MOS0046,
409 Some(node.span.clone()),
410 format!(
411 "duplicate citation key `{key}` in bibliography source `{}`",
412 mos_core::display_path(&path_buf)
413 ),
414 )
415 .with_annotation(mos_core::DiagnosticAnnotation::Related {
416 span: first.key_span.clone(),
417 message: format!(
418 "first bibliography source for `{key}` was `{}`",
419 mos_core::display_path(&first.path)
420 ),
421 })
422 .with_annotation(mos_core::DiagnosticAnnotation::Hint(
423 "keep citation keys unique across all declared bibliography sources"
424 .to_owned(),
425 )),
426 );
427 } else {
428 origins.insert(
429 key.clone(),
430 BibliographyOrigin {
431 path: path_buf.clone(),
432 key_span,
433 },
434 );
435 merged.entries.insert(key, entry);
436 }
437 }
438 }
439 Err(err) => {
440 complete = false;
441 diagnostics.push(err.to_diagnostic(path_buf));
442 }
443 }
444 }
445 LoadedBibliography {
446 records: merged,
447 origins,
448 complete,
449 }
450}