1use std::collections::BTreeMap;
15
16use mos_bib::{BibEntry, Bibliography};
17
18use crate::item::{
19 Date, DateVariable, Item, ItemType, Name, NameVariable, NumberVariable, StandardVariable,
20};
21
22#[must_use]
24pub fn item_from_bib_entry(entry: &BibEntry) -> Item {
25 let mut item = Item::new(entry.key.clone(), item_type_for(&entry.entry_type));
26 for (field, value) in &entry.fields {
27 apply_field(&mut item, &entry.entry_type, field, value);
28 }
29 item
30}
31
32#[must_use]
34pub fn library_from_bibliography(bibliography: &Bibliography) -> BTreeMap<String, Item> {
35 bibliography
36 .entries
37 .iter()
38 .map(|(key, entry)| (key.clone(), item_from_bib_entry(entry)))
39 .collect()
40}
41
42fn item_type_for(entry_type: &str) -> ItemType {
44 match entry_type {
45 "article" => ItemType::ArticleJournal,
46 "book" | "proceedings" => ItemType::Book,
47 "booklet" => ItemType::Pamphlet,
48 "inbook" | "incollection" => ItemType::Chapter,
49 "conference" | "inproceedings" => ItemType::PaperConference,
50 "manual" | "techreport" => ItemType::Report,
51 "mastersthesis" | "phdthesis" | "thesis" => ItemType::Thesis,
52 "unpublished" => ItemType::Manuscript,
53 "online" | "electronic" => ItemType::Webpage,
54 _ => ItemType::Document,
55 }
56}
57
58fn apply_field(item: &mut Item, entry_type: &str, field: &str, value: &str) {
60 let standard = match field {
62 "title" => Some(StandardVariable::Title),
63 "journal" | "booktitle" => Some(StandardVariable::ContainerTitle),
64 "publisher" | "school" | "institution" => Some(StandardVariable::Publisher),
65 "address" if is_conference_entry(entry_type) => Some(StandardVariable::EventPlace),
66 "address" => Some(StandardVariable::PublisherPlace),
67 "series" => Some(StandardVariable::CollectionTitle),
68 "note" => Some(StandardVariable::Note),
69 "abstract" => Some(StandardVariable::Abstract),
70 "keywords" => Some(StandardVariable::Keyword),
71 "doi" => Some(StandardVariable::Doi),
72 "url" => Some(StandardVariable::Url),
73 "isbn" => Some(StandardVariable::Isbn),
74 "issn" => Some(StandardVariable::Issn),
75 "language" => Some(StandardVariable::Language),
76 _ => None,
77 };
78 if let Some(variable) = standard {
79 item.standard.insert(variable, value.to_owned());
80 return;
81 }
82
83 let number = match field {
85 "volume" => Some(NumberVariable::Volume),
86 "number" if is_report_entry(entry_type) => Some(NumberVariable::Number),
87 "number" => Some(NumberVariable::Issue),
88 "pages" => Some(NumberVariable::Page),
89 "edition" => Some(NumberVariable::Edition),
90 "chapter" => Some(NumberVariable::ChapterNumber),
91 _ => None,
92 };
93 if let Some(variable) = number {
94 item.number.insert(variable, value.to_owned());
95 return;
96 }
97
98 match field {
100 "author" => {
101 item.name.insert(NameVariable::Author, parse_names(value));
102 }
103 "editor" => {
104 item.name.insert(NameVariable::Editor, parse_names(value));
105 }
106 "year" => {
107 item.date.insert(DateVariable::Issued, parse_year(value));
108 }
109 _ => {}
110 }
111}
112
113fn is_conference_entry(entry_type: &str) -> bool {
114 matches!(entry_type, "conference" | "inproceedings")
115}
116
117fn is_report_entry(entry_type: &str) -> bool {
118 matches!(entry_type, "manual" | "techreport")
119}
120
121fn parse_names(value: &str) -> Vec<Name> {
123 let mut names = Vec::new();
124 let mut token_start = 0;
125 let mut search_start = 0;
126
127 while let Some(relative_start) = value[search_start..].find("and") {
128 let and_start = search_start + relative_start;
129 let and_end = and_start + "and".len();
130 if is_name_separator(value, and_start, and_end) {
131 push_name(&mut names, &value[token_start..and_start]);
132 token_start = and_end;
133 }
134 search_start = and_end;
135 }
136
137 push_name(&mut names, &value[token_start..]);
138 names
139}
140
141fn is_name_separator(value: &str, start: usize, end: usize) -> bool {
142 let before = value[..start].chars().next_back();
143 let after = value[end..].chars().next();
144 before.is_some_and(char::is_whitespace) && after.is_some_and(char::is_whitespace)
145}
146
147fn push_name(names: &mut Vec<Name>, token: &str) {
148 let trimmed = token.trim();
149 if !trimmed.is_empty() {
150 names.push(parse_one_name(trimmed));
151 }
152}
153
154fn parse_one_name(token: &str) -> Name {
157 match token.split_once(',') {
158 Some((family, given)) => Name::person(family.trim(), given.trim()),
159 None => parse_name_without_comma(token),
160 }
161}
162
163fn parse_name_without_comma(token: &str) -> Name {
164 match token.rsplit_once(char::is_whitespace) {
165 Some((given, family)) => Name::person(family.trim(), given.trim()),
166 None => Name::literal(token),
167 }
168}
169
170fn parse_year(value: &str) -> Date {
173 match value.trim().parse::<i32>() {
174 Ok(year) => Date::year(year),
175 Err(_) => Date::literal(value),
176 }
177}
178
179#[cfg(test)]
180mod tests {
181 use super::*;
182
183 fn entry(entry_type: &str, key: &str, fields: &[(&str, &str)]) -> BibEntry {
184 BibEntry {
185 entry_type: entry_type.to_owned(),
186 key: key.to_owned(),
187 key_span: 0..key.len(),
188 fields: fields
189 .iter()
190 .map(|(k, v)| ((*k).to_owned(), (*v).to_owned()))
191 .collect(),
192 }
193 }
194
195 #[test]
196 fn maps_article_type_and_core_fields() {
197 let bib_entry = entry(
198 "article",
199 "knuth1984",
200 &[
201 ("title", "Literate Programming"),
202 ("year", "1984"),
203 ("journal", "The Computer Journal"),
204 ],
205 );
206 let item = item_from_bib_entry(&bib_entry);
207 assert_eq!(item.id, "knuth1984");
208 assert_eq!(item.item_type, ItemType::ArticleJournal);
209 assert_eq!(
210 item.standard
211 .get(&StandardVariable::Title)
212 .map(String::as_str),
213 Some("Literate Programming")
214 );
215 assert_eq!(
216 item.standard
217 .get(&StandardVariable::ContainerTitle)
218 .map(String::as_str),
219 Some("The Computer Journal")
220 );
221 assert_eq!(
222 item.date.get(&DateVariable::Issued),
223 Some(&Date::year(1984))
224 );
225 }
226
227 #[test]
228 fn splits_authors_on_and_and_comma() {
229 let bib_entry = entry(
230 "book",
231 "k",
232 &[("author", "Knuth, Donald E. and Ada Lovelace")],
233 );
234 let item = item_from_bib_entry(&bib_entry);
235 let authors = item
236 .name
237 .get(&NameVariable::Author)
238 .expect("authors present");
239 assert_eq!(authors.len(), 2);
240 assert_eq!(authors[0], Name::person("Knuth", "Donald E."));
241 assert_eq!(authors[1], Name::person("Lovelace", "Ada"));
242 }
243
244 #[test]
245 fn splits_names_on_whitespace_delimited_and_tokens() {
246 let bib_entry = entry(
247 "book",
248 "k",
249 &[("author", "Knuth, Donald E. and\n Ada Lovelace")],
250 );
251 let item = item_from_bib_entry(&bib_entry);
252 let authors = item
253 .name
254 .get(&NameVariable::Author)
255 .expect("authors present");
256 assert_eq!(authors.len(), 2);
257 assert_eq!(authors[0], Name::person("Knuth", "Donald E."));
258 assert_eq!(authors[1], Name::person("Lovelace", "Ada"));
259 }
260
261 #[test]
262 fn unknown_type_is_document_and_unknown_fields_drop() {
263 let bib_entry = entry("flibble", "k", &[("title", "T"), ("nonsense", "x")]);
264 let item = item_from_bib_entry(&bib_entry);
265 assert_eq!(item.item_type, ItemType::Document);
266 assert!(item.standard.contains_key(&StandardVariable::Title));
267 assert_eq!(item.standard.len(), 1, "unknown field should be dropped");
268 }
269
270 #[test]
271 fn non_numeric_year_becomes_a_literal_date() {
272 let bib_entry = entry("misc", "k", &[("year", "in press")]);
273 let item = item_from_bib_entry(&bib_entry);
274 assert_eq!(
275 item.date.get(&DateVariable::Issued),
276 Some(&Date::literal("in press"))
277 );
278 }
279
280 #[test]
281 fn maps_bibtex_entry_type_groups() {
282 let cases = [
283 ("article", ItemType::ArticleJournal),
284 ("book", ItemType::Book),
285 ("proceedings", ItemType::Book),
286 ("booklet", ItemType::Pamphlet),
287 ("inbook", ItemType::Chapter),
288 ("incollection", ItemType::Chapter),
289 ("conference", ItemType::PaperConference),
290 ("inproceedings", ItemType::PaperConference),
291 ("manual", ItemType::Report),
292 ("techreport", ItemType::Report),
293 ("mastersthesis", ItemType::Thesis),
294 ("phdthesis", ItemType::Thesis),
295 ("thesis", ItemType::Thesis),
296 ("unpublished", ItemType::Manuscript),
297 ("online", ItemType::Webpage),
298 ("electronic", ItemType::Webpage),
299 ("misc", ItemType::Document),
300 ];
301
302 for (entry_type, expected) in cases {
303 let item = item_from_bib_entry(&entry(entry_type, "k", &[]));
304 assert_eq!(item.item_type, expected, "entry type: {entry_type}");
305 }
306 }
307
308 #[test]
309 fn maps_standard_and_number_field_groups() {
310 let bib_entry = entry(
311 "book",
312 "k",
313 &[
314 ("title", "Title"),
315 ("booktitle", "Container"),
316 ("publisher", "Publisher"),
317 ("school", "School"),
318 ("institution", "Institution"),
319 ("address", "Place"),
320 ("series", "Series"),
321 ("note", "Note"),
322 ("abstract", "Abstract"),
323 ("keywords", "Keywords"),
324 ("doi", "10.0/demo"),
325 ("url", "https://example.invalid"),
326 ("isbn", "ISBN"),
327 ("issn", "ISSN"),
328 ("language", "en"),
329 ("volume", "2"),
330 ("number", "4"),
331 ("pages", "10-20"),
332 ("edition", "3"),
333 ("chapter", "7"),
334 ],
335 );
336 let item = item_from_bib_entry(&bib_entry);
337
338 assert_eq!(
339 item.standard
340 .get(&StandardVariable::Title)
341 .map(String::as_str),
342 Some("Title")
343 );
344 assert_eq!(
345 item.standard
346 .get(&StandardVariable::ContainerTitle)
347 .map(String::as_str),
348 Some("Container")
349 );
350 assert_eq!(
351 item.standard
352 .get(&StandardVariable::Publisher)
353 .map(String::as_str),
354 Some("School")
355 );
356 assert_eq!(
357 item.standard
358 .get(&StandardVariable::PublisherPlace)
359 .map(String::as_str),
360 Some("Place")
361 );
362 assert_eq!(
363 item.standard
364 .get(&StandardVariable::CollectionTitle)
365 .map(String::as_str),
366 Some("Series")
367 );
368 assert_eq!(
369 item.standard
370 .get(&StandardVariable::Note)
371 .map(String::as_str),
372 Some("Note")
373 );
374 assert_eq!(
375 item.standard
376 .get(&StandardVariable::Abstract)
377 .map(String::as_str),
378 Some("Abstract")
379 );
380 assert_eq!(
381 item.standard
382 .get(&StandardVariable::Keyword)
383 .map(String::as_str),
384 Some("Keywords")
385 );
386 assert_eq!(
387 item.standard
388 .get(&StandardVariable::Doi)
389 .map(String::as_str),
390 Some("10.0/demo")
391 );
392 assert_eq!(
393 item.standard
394 .get(&StandardVariable::Url)
395 .map(String::as_str),
396 Some("https://example.invalid")
397 );
398 assert_eq!(
399 item.standard
400 .get(&StandardVariable::Isbn)
401 .map(String::as_str),
402 Some("ISBN")
403 );
404 assert_eq!(
405 item.standard
406 .get(&StandardVariable::Issn)
407 .map(String::as_str),
408 Some("ISSN")
409 );
410 assert_eq!(
411 item.standard
412 .get(&StandardVariable::Language)
413 .map(String::as_str),
414 Some("en")
415 );
416
417 assert_eq!(
418 item.number.get(&NumberVariable::Volume).map(String::as_str),
419 Some("2")
420 );
421 assert_eq!(
422 item.number.get(&NumberVariable::Issue).map(String::as_str),
423 Some("4")
424 );
425 assert_eq!(
426 item.number.get(&NumberVariable::Page).map(String::as_str),
427 Some("10-20")
428 );
429 assert_eq!(
430 item.number
431 .get(&NumberVariable::Edition)
432 .map(String::as_str),
433 Some("3")
434 );
435 assert_eq!(
436 item.number
437 .get(&NumberVariable::ChapterNumber)
438 .map(String::as_str),
439 Some("7")
440 );
441 }
442
443 #[test]
444 fn maps_report_number_to_number_not_issue() {
445 let item = item_from_bib_entry(&entry("techreport", "k", &[("number", "TR-7")]));
446 assert_eq!(
447 item.number.get(&NumberVariable::Number).map(String::as_str),
448 Some("TR-7")
449 );
450 assert!(!item.number.contains_key(&NumberVariable::Issue));
451 }
452
453 #[test]
454 fn maps_conference_address_to_event_place() {
455 let item = item_from_bib_entry(&entry("inproceedings", "k", &[("address", "Paris")]));
456 assert_eq!(
457 item.standard
458 .get(&StandardVariable::EventPlace)
459 .map(String::as_str),
460 Some("Paris")
461 );
462 assert!(
463 !item
464 .standard
465 .contains_key(&StandardVariable::PublisherPlace)
466 );
467 }
468
469 #[test]
470 fn maps_editors_and_skips_empty_name_tokens() {
471 let bib_entry = entry(
472 "book",
473 "k",
474 &[
475 ("author", " Ada Lovelace and and Turing, Alan "),
476 ("editor", "Knuth, Donald"),
477 ],
478 );
479 let item = item_from_bib_entry(&bib_entry);
480 assert_eq!(
481 item.name.get(&NameVariable::Author),
482 Some(&vec![
483 Name::person("Lovelace", "Ada"),
484 Name::person("Turing", "Alan")
485 ])
486 );
487 assert_eq!(
488 item.name.get(&NameVariable::Editor),
489 Some(&vec![Name::person("Knuth", "Donald")])
490 );
491 }
492
493 #[test]
494 fn maps_whole_bibliography_by_key() {
495 let bibliography = Bibliography {
496 entries: [
497 ("a".to_owned(), entry("article", "a", &[("title", "First")])),
498 ("b".to_owned(), entry("book", "b", &[("title", "Second")])),
499 ]
500 .into_iter()
501 .collect(),
502 };
503
504 let library = library_from_bibliography(&bibliography);
505 assert_eq!(library.len(), 2);
506 assert_eq!(
507 library.get("a").map(|item| item.item_type),
508 Some(ItemType::ArticleJournal)
509 );
510 assert_eq!(
511 library.get("b").map(|item| item.item_type),
512 Some(ItemType::Book)
513 );
514 }
515
516 #[test]
517 fn keeps_single_token_names_literal() {
518 let item = item_from_bib_entry(&entry("book", "k", &[("author", "Plato")]));
519 assert_eq!(
520 item.name.get(&NameVariable::Author),
521 Some(&vec![Name::literal("Plato")])
522 );
523 }
524}