Skip to main content

mos_csl/
from_bibtex.rs

1//! Map parsed BibTeX records ([`mos_bib::BibEntry`]) into CSL [`Item`]s.
2//!
3//! This is a best-effort, infallible mapping: BibTeX entry types become the
4//! closest CSL [`ItemType`] (unknown → [`ItemType::Document`]), and recognised
5//! BibTeX fields become CSL variables. Unrecognised fields are dropped, as CSL
6//! processors do.
7//!
8//! Name handling is intentionally minimal: `author`/`editor` are split on
9//! whitespace-delimited `and` tokens, and per name a `Last, First` comma form
10//! or `First Last` form becomes family/given. Single-token names are kept as a
11//! [`literal`](Name::literal). Full BibTeX name parsing (protected institutional
12//! names, von/Jr particles) and `month` handling are future refinements.
13
14use std::collections::BTreeMap;
15
16use mos_bib::{BibEntry, Bibliography};
17
18use crate::item::{
19    Date, DateVariable, Item, ItemType, Name, NameVariable, NumberVariable, StandardVariable,
20};
21
22/// Map one BibTeX entry to a CSL [`Item`].
23#[must_use]
24pub fn item_from_bib_entry(entry: &BibEntry) -> Item {
25    let mut item = Item::new(entry.key.clone(), item_type_for(&entry.entry_type));
26    for (field, value) in &entry.fields {
27        apply_field(&mut item, &entry.entry_type, field, value);
28    }
29    item
30}
31
32/// Map a whole [`Bibliography`] to CSL items keyed by citation key.
33#[must_use]
34pub fn library_from_bibliography(bibliography: &Bibliography) -> BTreeMap<String, Item> {
35    bibliography
36        .entries
37        .iter()
38        .map(|(key, entry)| (key.clone(), item_from_bib_entry(entry)))
39        .collect()
40}
41
42/// BibTeX entry type (already lowercased by `mos-bib`) → closest CSL type.
43fn item_type_for(entry_type: &str) -> ItemType {
44    match entry_type {
45        "article" => ItemType::ArticleJournal,
46        "book" | "proceedings" => ItemType::Book,
47        "booklet" => ItemType::Pamphlet,
48        "inbook" | "incollection" => ItemType::Chapter,
49        "conference" | "inproceedings" => ItemType::PaperConference,
50        "manual" | "techreport" => ItemType::Report,
51        "mastersthesis" | "phdthesis" | "thesis" => ItemType::Thesis,
52        "unpublished" => ItemType::Manuscript,
53        "online" | "electronic" => ItemType::Webpage,
54        _ => ItemType::Document,
55    }
56}
57
58/// Place one recognised BibTeX field onto the item; drop unknown fields.
59fn apply_field(item: &mut Item, entry_type: &str, field: &str, value: &str) {
60    // Recognised string ("standard") fields, grouped by their CSL target.
61    let standard = match field {
62        "title" => Some(StandardVariable::Title),
63        "journal" | "booktitle" => Some(StandardVariable::ContainerTitle),
64        "publisher" | "school" | "institution" => Some(StandardVariable::Publisher),
65        "address" if is_conference_entry(entry_type) => Some(StandardVariable::EventPlace),
66        "address" => Some(StandardVariable::PublisherPlace),
67        "series" => Some(StandardVariable::CollectionTitle),
68        "note" => Some(StandardVariable::Note),
69        "abstract" => Some(StandardVariable::Abstract),
70        "keywords" => Some(StandardVariable::Keyword),
71        "doi" => Some(StandardVariable::Doi),
72        "url" => Some(StandardVariable::Url),
73        "isbn" => Some(StandardVariable::Isbn),
74        "issn" => Some(StandardVariable::Issn),
75        "language" => Some(StandardVariable::Language),
76        _ => None,
77    };
78    if let Some(variable) = standard {
79        item.standard.insert(variable, value.to_owned());
80        return;
81    }
82
83    // Recognised number fields.
84    let number = match field {
85        "volume" => Some(NumberVariable::Volume),
86        "number" if is_report_entry(entry_type) => Some(NumberVariable::Number),
87        "number" => Some(NumberVariable::Issue),
88        "pages" => Some(NumberVariable::Page),
89        "edition" => Some(NumberVariable::Edition),
90        "chapter" => Some(NumberVariable::ChapterNumber),
91        _ => None,
92    };
93    if let Some(variable) = number {
94        item.number.insert(variable, value.to_owned());
95        return;
96    }
97
98    // Name and date fields; anything else is dropped, as CSL processors do.
99    match field {
100        "author" => {
101            item.name.insert(NameVariable::Author, parse_names(value));
102        }
103        "editor" => {
104            item.name.insert(NameVariable::Editor, parse_names(value));
105        }
106        "year" => {
107            item.date.insert(DateVariable::Issued, parse_year(value));
108        }
109        _ => {}
110    }
111}
112
113fn is_conference_entry(entry_type: &str) -> bool {
114    matches!(entry_type, "conference" | "inproceedings")
115}
116
117fn is_report_entry(entry_type: &str) -> bool {
118    matches!(entry_type, "manual" | "techreport")
119}
120
121/// Split a BibTeX name list on whitespace-delimited `and` tokens.
122fn parse_names(value: &str) -> Vec<Name> {
123    let mut names = Vec::new();
124    let mut token_start = 0;
125    let mut search_start = 0;
126
127    while let Some(relative_start) = value[search_start..].find("and") {
128        let and_start = search_start + relative_start;
129        let and_end = and_start + "and".len();
130        if is_name_separator(value, and_start, and_end) {
131            push_name(&mut names, &value[token_start..and_start]);
132            token_start = and_end;
133        }
134        search_start = and_end;
135    }
136
137    push_name(&mut names, &value[token_start..]);
138    names
139}
140
141fn is_name_separator(value: &str, start: usize, end: usize) -> bool {
142    let before = value[..start].chars().next_back();
143    let after = value[end..].chars().next();
144    before.is_some_and(char::is_whitespace) && after.is_some_and(char::is_whitespace)
145}
146
147fn push_name(names: &mut Vec<Name>, token: &str) {
148    let trimmed = token.trim();
149    if !trimmed.is_empty() {
150        names.push(parse_one_name(trimmed));
151    }
152}
153
154/// `Last, First` and `First Last` forms become family/given; single-token names
155/// stay literal because `mos-bib` does not preserve institutional bracing yet.
156fn parse_one_name(token: &str) -> Name {
157    match token.split_once(',') {
158        Some((family, given)) => Name::person(family.trim(), given.trim()),
159        None => parse_name_without_comma(token),
160    }
161}
162
163fn parse_name_without_comma(token: &str) -> Name {
164    match token.rsplit_once(char::is_whitespace) {
165        Some((given, family)) => Name::person(family.trim(), given.trim()),
166        None => Name::literal(token),
167    }
168}
169
170/// Parse a BibTeX `year` into an `issued` [`Date`]; a non-numeric year is kept
171/// as a literal.
172fn parse_year(value: &str) -> Date {
173    match value.trim().parse::<i32>() {
174        Ok(year) => Date::year(year),
175        Err(_) => Date::literal(value),
176    }
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182
183    fn entry(entry_type: &str, key: &str, fields: &[(&str, &str)]) -> BibEntry {
184        BibEntry {
185            entry_type: entry_type.to_owned(),
186            key: key.to_owned(),
187            key_span: 0..key.len(),
188            fields: fields
189                .iter()
190                .map(|(k, v)| ((*k).to_owned(), (*v).to_owned()))
191                .collect(),
192        }
193    }
194
195    #[test]
196    fn maps_article_type_and_core_fields() {
197        let bib_entry = entry(
198            "article",
199            "knuth1984",
200            &[
201                ("title", "Literate Programming"),
202                ("year", "1984"),
203                ("journal", "The Computer Journal"),
204            ],
205        );
206        let item = item_from_bib_entry(&bib_entry);
207        assert_eq!(item.id, "knuth1984");
208        assert_eq!(item.item_type, ItemType::ArticleJournal);
209        assert_eq!(
210            item.standard
211                .get(&StandardVariable::Title)
212                .map(String::as_str),
213            Some("Literate Programming")
214        );
215        assert_eq!(
216            item.standard
217                .get(&StandardVariable::ContainerTitle)
218                .map(String::as_str),
219            Some("The Computer Journal")
220        );
221        assert_eq!(
222            item.date.get(&DateVariable::Issued),
223            Some(&Date::year(1984))
224        );
225    }
226
227    #[test]
228    fn splits_authors_on_and_and_comma() {
229        let bib_entry = entry(
230            "book",
231            "k",
232            &[("author", "Knuth, Donald E. and Ada Lovelace")],
233        );
234        let item = item_from_bib_entry(&bib_entry);
235        let authors = item
236            .name
237            .get(&NameVariable::Author)
238            .expect("authors present");
239        assert_eq!(authors.len(), 2);
240        assert_eq!(authors[0], Name::person("Knuth", "Donald E."));
241        assert_eq!(authors[1], Name::person("Lovelace", "Ada"));
242    }
243
244    #[test]
245    fn splits_names_on_whitespace_delimited_and_tokens() {
246        let bib_entry = entry(
247            "book",
248            "k",
249            &[("author", "Knuth, Donald E.  and\n  Ada Lovelace")],
250        );
251        let item = item_from_bib_entry(&bib_entry);
252        let authors = item
253            .name
254            .get(&NameVariable::Author)
255            .expect("authors present");
256        assert_eq!(authors.len(), 2);
257        assert_eq!(authors[0], Name::person("Knuth", "Donald E."));
258        assert_eq!(authors[1], Name::person("Lovelace", "Ada"));
259    }
260
261    #[test]
262    fn unknown_type_is_document_and_unknown_fields_drop() {
263        let bib_entry = entry("flibble", "k", &[("title", "T"), ("nonsense", "x")]);
264        let item = item_from_bib_entry(&bib_entry);
265        assert_eq!(item.item_type, ItemType::Document);
266        assert!(item.standard.contains_key(&StandardVariable::Title));
267        assert_eq!(item.standard.len(), 1, "unknown field should be dropped");
268    }
269
270    #[test]
271    fn non_numeric_year_becomes_a_literal_date() {
272        let bib_entry = entry("misc", "k", &[("year", "in press")]);
273        let item = item_from_bib_entry(&bib_entry);
274        assert_eq!(
275            item.date.get(&DateVariable::Issued),
276            Some(&Date::literal("in press"))
277        );
278    }
279
280    #[test]
281    fn maps_bibtex_entry_type_groups() {
282        let cases = [
283            ("article", ItemType::ArticleJournal),
284            ("book", ItemType::Book),
285            ("proceedings", ItemType::Book),
286            ("booklet", ItemType::Pamphlet),
287            ("inbook", ItemType::Chapter),
288            ("incollection", ItemType::Chapter),
289            ("conference", ItemType::PaperConference),
290            ("inproceedings", ItemType::PaperConference),
291            ("manual", ItemType::Report),
292            ("techreport", ItemType::Report),
293            ("mastersthesis", ItemType::Thesis),
294            ("phdthesis", ItemType::Thesis),
295            ("thesis", ItemType::Thesis),
296            ("unpublished", ItemType::Manuscript),
297            ("online", ItemType::Webpage),
298            ("electronic", ItemType::Webpage),
299            ("misc", ItemType::Document),
300        ];
301
302        for (entry_type, expected) in cases {
303            let item = item_from_bib_entry(&entry(entry_type, "k", &[]));
304            assert_eq!(item.item_type, expected, "entry type: {entry_type}");
305        }
306    }
307
308    #[test]
309    fn maps_standard_and_number_field_groups() {
310        let bib_entry = entry(
311            "book",
312            "k",
313            &[
314                ("title", "Title"),
315                ("booktitle", "Container"),
316                ("publisher", "Publisher"),
317                ("school", "School"),
318                ("institution", "Institution"),
319                ("address", "Place"),
320                ("series", "Series"),
321                ("note", "Note"),
322                ("abstract", "Abstract"),
323                ("keywords", "Keywords"),
324                ("doi", "10.0/demo"),
325                ("url", "https://example.invalid"),
326                ("isbn", "ISBN"),
327                ("issn", "ISSN"),
328                ("language", "en"),
329                ("volume", "2"),
330                ("number", "4"),
331                ("pages", "10-20"),
332                ("edition", "3"),
333                ("chapter", "7"),
334            ],
335        );
336        let item = item_from_bib_entry(&bib_entry);
337
338        assert_eq!(
339            item.standard
340                .get(&StandardVariable::Title)
341                .map(String::as_str),
342            Some("Title")
343        );
344        assert_eq!(
345            item.standard
346                .get(&StandardVariable::ContainerTitle)
347                .map(String::as_str),
348            Some("Container")
349        );
350        assert_eq!(
351            item.standard
352                .get(&StandardVariable::Publisher)
353                .map(String::as_str),
354            Some("School")
355        );
356        assert_eq!(
357            item.standard
358                .get(&StandardVariable::PublisherPlace)
359                .map(String::as_str),
360            Some("Place")
361        );
362        assert_eq!(
363            item.standard
364                .get(&StandardVariable::CollectionTitle)
365                .map(String::as_str),
366            Some("Series")
367        );
368        assert_eq!(
369            item.standard
370                .get(&StandardVariable::Note)
371                .map(String::as_str),
372            Some("Note")
373        );
374        assert_eq!(
375            item.standard
376                .get(&StandardVariable::Abstract)
377                .map(String::as_str),
378            Some("Abstract")
379        );
380        assert_eq!(
381            item.standard
382                .get(&StandardVariable::Keyword)
383                .map(String::as_str),
384            Some("Keywords")
385        );
386        assert_eq!(
387            item.standard
388                .get(&StandardVariable::Doi)
389                .map(String::as_str),
390            Some("10.0/demo")
391        );
392        assert_eq!(
393            item.standard
394                .get(&StandardVariable::Url)
395                .map(String::as_str),
396            Some("https://example.invalid")
397        );
398        assert_eq!(
399            item.standard
400                .get(&StandardVariable::Isbn)
401                .map(String::as_str),
402            Some("ISBN")
403        );
404        assert_eq!(
405            item.standard
406                .get(&StandardVariable::Issn)
407                .map(String::as_str),
408            Some("ISSN")
409        );
410        assert_eq!(
411            item.standard
412                .get(&StandardVariable::Language)
413                .map(String::as_str),
414            Some("en")
415        );
416
417        assert_eq!(
418            item.number.get(&NumberVariable::Volume).map(String::as_str),
419            Some("2")
420        );
421        assert_eq!(
422            item.number.get(&NumberVariable::Issue).map(String::as_str),
423            Some("4")
424        );
425        assert_eq!(
426            item.number.get(&NumberVariable::Page).map(String::as_str),
427            Some("10-20")
428        );
429        assert_eq!(
430            item.number
431                .get(&NumberVariable::Edition)
432                .map(String::as_str),
433            Some("3")
434        );
435        assert_eq!(
436            item.number
437                .get(&NumberVariable::ChapterNumber)
438                .map(String::as_str),
439            Some("7")
440        );
441    }
442
443    #[test]
444    fn maps_report_number_to_number_not_issue() {
445        let item = item_from_bib_entry(&entry("techreport", "k", &[("number", "TR-7")]));
446        assert_eq!(
447            item.number.get(&NumberVariable::Number).map(String::as_str),
448            Some("TR-7")
449        );
450        assert!(!item.number.contains_key(&NumberVariable::Issue));
451    }
452
453    #[test]
454    fn maps_conference_address_to_event_place() {
455        let item = item_from_bib_entry(&entry("inproceedings", "k", &[("address", "Paris")]));
456        assert_eq!(
457            item.standard
458                .get(&StandardVariable::EventPlace)
459                .map(String::as_str),
460            Some("Paris")
461        );
462        assert!(
463            !item
464                .standard
465                .contains_key(&StandardVariable::PublisherPlace)
466        );
467    }
468
469    #[test]
470    fn maps_editors_and_skips_empty_name_tokens() {
471        let bib_entry = entry(
472            "book",
473            "k",
474            &[
475                ("author", " Ada Lovelace and  and Turing, Alan "),
476                ("editor", "Knuth, Donald"),
477            ],
478        );
479        let item = item_from_bib_entry(&bib_entry);
480        assert_eq!(
481            item.name.get(&NameVariable::Author),
482            Some(&vec![
483                Name::person("Lovelace", "Ada"),
484                Name::person("Turing", "Alan")
485            ])
486        );
487        assert_eq!(
488            item.name.get(&NameVariable::Editor),
489            Some(&vec![Name::person("Knuth", "Donald")])
490        );
491    }
492
493    #[test]
494    fn maps_whole_bibliography_by_key() {
495        let bibliography = Bibliography {
496            entries: [
497                ("a".to_owned(), entry("article", "a", &[("title", "First")])),
498                ("b".to_owned(), entry("book", "b", &[("title", "Second")])),
499            ]
500            .into_iter()
501            .collect(),
502        };
503
504        let library = library_from_bibliography(&bibliography);
505        assert_eq!(library.len(), 2);
506        assert_eq!(
507            library.get("a").map(|item| item.item_type),
508            Some(ItemType::ArticleJournal)
509        );
510        assert_eq!(
511            library.get("b").map(|item| item.item_type),
512            Some(ItemType::Book)
513        );
514    }
515
516    #[test]
517    fn keeps_single_token_names_literal() {
518        let item = item_from_bib_entry(&entry("book", "k", &[("author", "Plato")]));
519        assert_eq!(
520            item.name.get(&NameVariable::Author),
521            Some(&vec![Name::literal("Plato")])
522        );
523    }
524}