1use std::collections::BTreeMap;
12
13use roxmltree::{Document, Node};
14
15use crate::error::{CslParseError, CslParseErrorKind};
16use crate::style::{
17 Bibliography, BibliographyOptions, Branch, Choose, Citation, CitationOptions, Common,
18 Conditions, DateElement, DatePart, Element, EtAl, Group, Info, InfoCategory, InfoContributor,
19 InfoLink, InheritableNameOptions, Label, Layout, LocaleBlock, Match, NameElement, NamePart,
20 Names, Number, SortKey, SortKeyOptions, SortTarget, Style, StyleClass, StyleOptions, Text,
21 TextSource,
22};
23
24const CSL_NAMESPACE: &str = "http://purl.org/net/xbiblio/csl";
26
27pub fn parse_style(input: &str) -> Result<Style, CslParseError> {
54 let document = Document::parse(input).map_err(|error| {
55 let offset = text_pos_to_byte_offset(input, error.pos()).unwrap_or(0);
56 CslParseError::new(CslParseErrorKind::MalformedXml(error.to_string()), offset)
57 })?;
58 let root = document.root_element();
59 if root.tag_name().name() != "style" {
60 let name = root.tag_name().name().to_owned();
61 return Err(err_at(root, CslParseErrorKind::UnexpectedRoot(name)));
62 }
63 if let Some(namespace) = root.tag_name().namespace()
67 && namespace != CSL_NAMESPACE
68 {
69 return Err(err_at(
70 root,
71 CslParseErrorKind::ForeignNamespace(namespace.to_owned()),
72 ));
73 }
74
75 let version = root
76 .attribute("version")
77 .ok_or_else(|| err_at(root, CslParseErrorKind::MissingVersion))?;
78 if version != "1.0" && !version.starts_with("1.0.") {
82 return Err(err_at(
83 root,
84 CslParseErrorKind::UnsupportedVersion(version.to_owned()),
85 ));
86 }
87 let version = version.to_owned();
88 let class = match root.attribute("class") {
89 Some("in-text") => StyleClass::InText,
90 Some("note") => StyleClass::Note,
91 Some(other) => {
92 return Err(err_at(
93 root,
94 CslParseErrorKind::UnknownClass(other.to_owned()),
95 ));
96 }
97 None => return Err(err_at(root, CslParseErrorKind::MissingClass)),
98 };
99 let default_locale = attr(root, "default-locale");
100
101 let mut info = Info::default();
102 let mut citation = None;
103 let mut bibliography = None;
104 let mut macros = BTreeMap::new();
105 let mut locales = Vec::new();
106
107 for child in child_elements(root) {
108 match child.tag_name().name() {
109 "info" => info = parse_info(child),
110 "citation" => citation = Some(parse_citation(child)?),
111 "bibliography" => bibliography = Some(parse_bibliography(child)?),
112 "macro" => {
113 let name = child
114 .attribute("name")
115 .ok_or_else(|| err_at(child, CslParseErrorKind::MissingMacroName))?;
116 macros.insert(name.to_owned(), parse_elements(child)?);
117 }
118 "locale" => locales.push(parse_locale(child, input)),
119 other => {
120 return Err(err_at(
121 child,
122 CslParseErrorKind::UnsupportedElement(other.to_owned()),
123 ));
124 }
125 }
126 }
127
128 Ok(Style {
129 class,
130 version,
131 default_locale,
132 options: parse_style_options(root),
133 info,
134 citation,
135 bibliography,
136 macros,
137 locales,
138 })
139}
140
141fn parse_info(node: Node<'_, '_>) -> Info {
142 let mut info = Info::default();
143 for child in child_elements(node) {
144 match child.tag_name().name() {
145 "id" => info.id = child.text().map(str::to_owned),
146 "title" => info.title = child.text().map(str::to_owned),
147 "link" => info.links.push(parse_info_link(child)),
148 "category" => info.categories.push(parse_info_category(child)),
149 "author" => info.authors.push(parse_info_contributor(child)),
150 "contributor" => info.contributors.push(parse_info_contributor(child)),
151 "updated" => info.updated = child.text().map(str::to_owned),
152 "issn" => {
153 if let Some(text) = child.text() {
154 info.issn.push(text.to_owned());
155 }
156 }
157 _ => {}
159 }
160 }
161 info
162}
163
164fn parse_info_link(node: Node<'_, '_>) -> InfoLink {
165 InfoLink {
166 rel: attr(node, "rel"),
167 href: attr(node, "href"),
168 media_type: attr(node, "type"),
169 }
170}
171
172fn parse_info_category(node: Node<'_, '_>) -> InfoCategory {
173 InfoCategory {
174 citation_format: attr(node, "citation-format"),
175 field: attr(node, "field"),
176 }
177}
178
179fn parse_info_contributor(node: Node<'_, '_>) -> InfoContributor {
180 let mut contributor = InfoContributor::default();
181 for child in child_elements(node) {
182 match child.tag_name().name() {
183 "name" => contributor.name = child.text().map(str::to_owned),
184 "uri" => contributor.uri = child.text().map(str::to_owned),
185 "email" => contributor.email = child.text().map(str::to_owned),
186 _ => {}
187 }
188 }
189 contributor
190}
191
192fn parse_locale(node: Node<'_, '_>, input: &str) -> LocaleBlock {
193 let xml = match input.get(node.range()) {
194 Some(text) => text.to_owned(),
195 None => String::new(),
196 };
197 LocaleBlock { xml }
198}
199
200fn parse_citation(node: Node<'_, '_>) -> Result<Citation, CslParseError> {
201 let (layout, sort) = parse_layout_and_sort(node)?;
202 Ok(Citation {
203 layout,
204 sort,
205 options: parse_citation_options(node),
206 })
207}
208
209fn parse_bibliography(node: Node<'_, '_>) -> Result<Bibliography, CslParseError> {
210 let (layout, sort) = parse_layout_and_sort(node)?;
211 Ok(Bibliography {
212 layout,
213 sort,
214 options: parse_bibliography_options(node),
215 })
216}
217
218fn parse_layout_and_sort(node: Node<'_, '_>) -> Result<(Layout, Vec<SortKey>), CslParseError> {
219 let mut layout = None;
220 let mut sort = Vec::new();
221 for child in child_elements(node) {
222 match child.tag_name().name() {
223 "layout" => layout = Some(parse_layout(child)?),
224 "sort" => sort = parse_sort(child),
225 other => {
226 return Err(err_at(
227 child,
228 CslParseErrorKind::UnsupportedElement(other.to_owned()),
229 ));
230 }
231 }
232 }
233 let layout = layout.ok_or_else(|| err_at(node, CslParseErrorKind::MissingLayout))?;
234 Ok((layout, sort))
235}
236
237fn parse_layout(node: Node<'_, '_>) -> Result<Layout, CslParseError> {
238 Ok(Layout {
239 elements: parse_elements(node)?,
240 common: parse_common(node),
241 })
242}
243
244fn parse_sort(node: Node<'_, '_>) -> Vec<SortKey> {
245 let mut keys = Vec::new();
246 for child in child_elements(node) {
247 if child.tag_name().name() == "key" {
248 let target = child.attribute("macro").map_or_else(
249 || SortTarget::Variable(attr(child, "variable").unwrap_or_default()),
250 |name| SortTarget::Macro(name.to_owned()),
251 );
252 keys.push(SortKey {
253 target,
254 descending: child.attribute("sort") == Some("descending"),
255 options: parse_sort_key_options(child),
256 });
257 }
258 }
259 keys
260}
261
262fn parse_elements(node: Node<'_, '_>) -> Result<Vec<Element>, CslParseError> {
263 let mut elements = Vec::new();
264 for child in child_elements(node) {
265 elements.push(parse_element(child)?);
266 }
267 Ok(elements)
268}
269
270fn parse_element(node: Node<'_, '_>) -> Result<Element, CslParseError> {
271 let element = match node.tag_name().name() {
272 "text" => Element::Text(parse_text(node)?),
273 "number" => Element::Number(parse_number(node)),
274 "date" => Element::Date(parse_date(node)),
275 "names" => Element::Names(Box::new(parse_names(node)?)),
276 "label" => Element::Label(parse_label(node)),
277 "group" => Element::Group(parse_group(node)?),
278 "choose" => Element::Choose(parse_choose(node)?),
279 other => {
280 return Err(err_at(
281 node,
282 CslParseErrorKind::UnsupportedElement(other.to_owned()),
283 ));
284 }
285 };
286 Ok(element)
287}
288
289fn parse_text(node: Node<'_, '_>) -> Result<Text, CslParseError> {
290 let source_count = [
291 node.attribute("variable"),
292 node.attribute("macro"),
293 node.attribute("term"),
294 node.attribute("value"),
295 ]
296 .into_iter()
297 .flatten()
298 .count();
299 if source_count > 1 {
300 return Err(err_at(node, CslParseErrorKind::TextWithMultipleSources));
301 }
302
303 let source = if let Some(variable) = node.attribute("variable") {
304 TextSource::Variable {
305 name: variable.to_owned(),
306 form: attr(node, "form"),
307 }
308 } else if let Some(name) = node.attribute("macro") {
309 TextSource::Macro(name.to_owned())
310 } else if let Some(term) = node.attribute("term") {
311 TextSource::Term {
312 name: term.to_owned(),
313 form: attr(node, "form"),
314 plural: bool_attr(node, "plural"),
315 }
316 } else if let Some(value) = node.attribute("value") {
317 TextSource::Value(value.to_owned())
318 } else {
319 return Err(err_at(node, CslParseErrorKind::TextWithoutSource));
320 };
321 Ok(Text {
322 source,
323 quotes: bool_attr(node, "quotes"),
324 strip_periods: bool_attr(node, "strip-periods"),
325 common: parse_common(node),
326 })
327}
328
329fn parse_number(node: Node<'_, '_>) -> Number {
330 Number {
331 variable: attr(node, "variable").unwrap_or_default(),
332 form: attr(node, "form"),
333 common: parse_common(node),
334 }
335}
336
337fn parse_date(node: Node<'_, '_>) -> DateElement {
338 let mut parts = Vec::new();
339 for child in child_elements(node) {
340 if child.tag_name().name() == "date-part" {
341 parts.push(DatePart {
342 name: attr(child, "name").unwrap_or_default(),
343 form: attr(child, "form"),
344 range_delimiter: attr(child, "range-delimiter"),
345 strip_periods: attr(child, "strip-periods"),
346 common: parse_common(child),
347 });
348 }
349 }
350 DateElement {
351 variable: attr(node, "variable").unwrap_or_default(),
352 form: attr(node, "form"),
353 date_parts: attr(node, "date-parts"),
354 parts,
355 common: parse_common(node),
356 }
357}
358
359fn parse_names(node: Node<'_, '_>) -> Result<Names, CslParseError> {
360 let variables = attr(node, "variable")
361 .unwrap_or_default()
362 .split_whitespace()
363 .map(str::to_owned)
364 .collect();
365 let mut name = None;
366 let mut et_al = None;
367 let mut label = None;
368 let mut substitute = Vec::new();
369 for child in child_elements(node) {
370 match child.tag_name().name() {
371 "name" => {
372 name = Some(parse_name_element(child));
373 }
374 "et-al" => {
375 et_al = Some(EtAl {
376 term: attr(child, "term"),
377 common: parse_common(child),
378 });
379 }
380 "label" => label = Some(parse_label(child)),
381 "substitute" => substitute = parse_elements(child)?,
382 other => {
383 return Err(err_at(
384 child,
385 CslParseErrorKind::UnsupportedElement(other.to_owned()),
386 ));
387 }
388 }
389 }
390 Ok(Names {
391 variables,
392 name,
393 et_al,
394 label,
395 substitute,
396 common: parse_common(node),
397 })
398}
399
400fn parse_name_element(node: Node<'_, '_>) -> NameElement {
401 let mut parts = Vec::new();
402 for child in child_elements(node) {
403 if child.tag_name().name() == "name-part" {
404 parts.push(NamePart {
405 name: attr(child, "name"),
406 common: parse_common(child),
407 });
408 }
409 }
410
411 NameElement {
412 form: attr(node, "form"),
413 options: parse_inheritable_name_options(node),
414 parts,
415 common: parse_common(node),
416 }
417}
418
419fn parse_label(node: Node<'_, '_>) -> Label {
420 Label {
421 variable: attr(node, "variable"),
422 form: attr(node, "form"),
423 plural: attr(node, "plural"),
424 strip_periods: attr(node, "strip-periods"),
425 common: parse_common(node),
426 }
427}
428
429fn parse_group(node: Node<'_, '_>) -> Result<Group, CslParseError> {
430 Ok(Group {
431 children: parse_elements(node)?,
432 common: parse_common(node),
433 })
434}
435
436fn parse_choose(node: Node<'_, '_>) -> Result<Choose, CslParseError> {
437 let mut branches = Vec::new();
438 let mut otherwise = Vec::new();
439 let mut seen_if = false;
440 let mut seen_else = false;
441 for child in child_elements(node) {
442 match child.tag_name().name() {
443 "if" => {
444 if seen_if || seen_else {
445 return Err(err_at(child, CslParseErrorKind::InvalidChooseOrder));
446 }
447 seen_if = true;
448 branches.push(Branch {
449 conditions: parse_conditions(child),
450 children: parse_elements(child)?,
451 });
452 }
453 "else-if" => {
454 if !seen_if || seen_else {
455 return Err(err_at(child, CslParseErrorKind::InvalidChooseOrder));
456 }
457 branches.push(Branch {
458 conditions: parse_conditions(child),
459 children: parse_elements(child)?,
460 });
461 }
462 "else" => {
463 if !seen_if || seen_else {
464 return Err(err_at(child, CslParseErrorKind::InvalidChooseOrder));
465 }
466 seen_else = true;
467 otherwise = parse_elements(child)?;
468 }
469 other => {
470 return Err(err_at(
471 child,
472 CslParseErrorKind::UnsupportedElement(other.to_owned()),
473 ));
474 }
475 }
476 }
477 if !seen_if {
478 return Err(err_at(node, CslParseErrorKind::InvalidChooseOrder));
479 }
480 Ok(Choose {
481 branches,
482 otherwise,
483 })
484}
485
486fn parse_conditions(node: Node<'_, '_>) -> Conditions {
487 let match_mode = match node.attribute("match") {
488 Some("any") => Match::Any,
489 Some("none") => Match::None,
490 _ => Match::All,
491 };
492 Conditions {
493 match_mode,
494 kind: tokens(node, "type"),
495 variable: tokens(node, "variable"),
496 is_numeric: tokens(node, "is-numeric"),
497 is_uncertain_date: tokens(node, "is-uncertain-date"),
498 locator: tokens(node, "locator"),
499 position: tokens(node, "position"),
500 disambiguate: bool_attr(node, "disambiguate"),
501 }
502}
503
504fn parse_common(node: Node<'_, '_>) -> Common {
505 Common {
506 prefix: attr(node, "prefix"),
507 suffix: attr(node, "suffix"),
508 delimiter: attr(node, "delimiter"),
509 font_style: attr(node, "font-style"),
510 font_variant: attr(node, "font-variant"),
511 font_weight: attr(node, "font-weight"),
512 text_decoration: attr(node, "text-decoration"),
513 vertical_align: attr(node, "vertical-align"),
514 text_case: attr(node, "text-case"),
515 display: attr(node, "display"),
516 }
517}
518
519fn parse_style_options(node: Node<'_, '_>) -> StyleOptions {
520 StyleOptions {
521 page_range_format: attr(node, "page-range-format"),
522 demote_non_dropping_particle: attr(node, "demote-non-dropping-particle"),
523 initialize_with_hyphen: attr(node, "initialize-with-hyphen"),
524 names: parse_inheritable_name_options(node),
525 }
526}
527
528fn parse_citation_options(node: Node<'_, '_>) -> CitationOptions {
529 CitationOptions {
530 collapse: attr(node, "collapse"),
531 cite_group_delimiter: attr(node, "cite-group-delimiter"),
532 year_suffix_delimiter: attr(node, "year-suffix-delimiter"),
533 after_collapse_delimiter: attr(node, "after-collapse-delimiter"),
534 disambiguate_add_names: attr(node, "disambiguate-add-names"),
535 disambiguate_add_givenname: attr(node, "disambiguate-add-givenname"),
536 disambiguate_add_year_suffix: attr(node, "disambiguate-add-year-suffix"),
537 givenname_disambiguation_rule: attr(node, "givenname-disambiguation-rule"),
538 near_note_distance: attr(node, "near-note-distance"),
539 names: parse_inheritable_name_options(node),
540 }
541}
542
543fn parse_bibliography_options(node: Node<'_, '_>) -> BibliographyOptions {
544 BibliographyOptions {
545 hanging_indent: attr(node, "hanging-indent"),
546 second_field_align: attr(node, "second-field-align"),
547 line_spacing: attr(node, "line-spacing"),
548 entry_spacing: attr(node, "entry-spacing"),
549 subsequent_author_substitute: attr(node, "subsequent-author-substitute"),
550 subsequent_author_substitute_rule: attr(node, "subsequent-author-substitute-rule"),
551 names: parse_inheritable_name_options(node),
552 }
553}
554
555fn parse_sort_key_options(node: Node<'_, '_>) -> SortKeyOptions {
556 SortKeyOptions {
557 names_min: attr(node, "names-min"),
558 names_use_first: attr(node, "names-use-first"),
559 names_use_last: attr(node, "names-use-last"),
560 }
561}
562
563fn parse_inheritable_name_options(node: Node<'_, '_>) -> InheritableNameOptions {
564 InheritableNameOptions {
565 et_al_min: attr(node, "et-al-min"),
566 et_al_use_first: attr(node, "et-al-use-first"),
567 et_al_subsequent_min: attr(node, "et-al-subsequent-min"),
568 et_al_subsequent_use_first: attr(node, "et-al-subsequent-use-first"),
569 et_al_use_last: attr(node, "et-al-use-last"),
570 and: attr(node, "and"),
571 delimiter_precedes_et_al: attr(node, "delimiter-precedes-et-al"),
572 delimiter_precedes_last: attr(node, "delimiter-precedes-last"),
573 initialize: attr(node, "initialize"),
574 initialize_with: attr(node, "initialize-with"),
575 name_as_sort_order: attr(node, "name-as-sort-order"),
576 sort_separator: attr(node, "sort-separator"),
577 }
578}
579
580fn child_elements<'a, 'input>(node: Node<'a, 'input>) -> impl Iterator<Item = Node<'a, 'input>> {
582 node.children().filter(Node::is_element)
583}
584
585fn attr(node: Node<'_, '_>, name: &str) -> Option<String> {
587 node.attribute(name).map(str::to_owned)
588}
589
590fn bool_attr(node: Node<'_, '_>, name: &str) -> bool {
592 node.attribute(name) == Some("true")
593}
594
595fn tokens(node: Node<'_, '_>, name: &str) -> Vec<String> {
597 node.attribute(name)
598 .map(|value| value.split_whitespace().map(str::to_owned).collect())
599 .unwrap_or_default()
600}
601
602fn text_pos_to_byte_offset(input: &str, position: roxmltree::TextPos) -> Option<usize> {
603 let row = usize::try_from(position.row).ok()?;
604 let col = usize::try_from(position.col).ok()?;
605 if row == 0 || col == 0 {
606 return None;
607 }
608
609 let (line_start, line) = line_at(input, row)?;
610 let col_offset = column_to_byte_offset(line, col)?;
611 Some(line_start + col_offset)
612}
613
614fn line_at(input: &str, row: usize) -> Option<(usize, &str)> {
615 let mut line_start = 0;
616 for (line_index, line) in input.split_inclusive('\n').enumerate() {
617 if line_index + 1 == row {
618 let line_without_newline = match line.strip_suffix('\n') {
619 Some(stripped) => stripped,
620 None => line,
621 };
622 return Some((line_start, line_without_newline));
623 }
624 line_start += line.len();
625 }
626
627 if row == 1 && input.is_empty() {
628 return Some((0, ""));
629 }
630 None
631}
632
633fn column_to_byte_offset(line: &str, col: usize) -> Option<usize> {
634 let target_chars = col.checked_sub(1)?;
635 let mut chars_seen = 0;
636 for (byte_offset, _) in line.char_indices() {
637 if chars_seen == target_chars {
638 return Some(byte_offset);
639 }
640 chars_seen += 1;
641 }
642
643 if chars_seen == target_chars {
644 Some(line.len())
645 } else {
646 None
647 }
648}
649
650fn err_at(node: Node<'_, '_>, kind: CslParseErrorKind) -> CslParseError {
652 CslParseError::new(kind, node.range().start)
653}