1use std::collections::BTreeMap;
20
21use crate::error::{BibParseError, BibParseErrorKind};
22use crate::record::{BibEntry, Bibliography};
23
24pub fn parse_bibtex(input: &str) -> Result<Bibliography, BibParseError> {
51 let mut parser = Parser::new(input);
52 let mut entries = BTreeMap::new();
53 parser.skip_whitespace();
54 while !parser.at_end() {
55 let parsed = parser.parse_entry()?;
56 if entries.contains_key(&parsed.entry.key) {
57 return Err(BibParseError::new(
58 BibParseErrorKind::DuplicateKey,
59 parsed.key_offset,
60 ));
61 }
62 entries.insert(parsed.entry.key.clone(), parsed.entry);
63 parser.skip_whitespace();
64 }
65 Ok(Bibliography { entries })
66}
67
68struct ParsedEntry {
69 entry: BibEntry,
70 key_offset: usize,
71}
72
73struct ParsedKey {
74 text: String,
75 offset: usize,
76}
77
78struct Parser<'a> {
83 src: &'a str,
84 bytes: &'a [u8],
85 pos: usize,
86}
87
88impl<'a> Parser<'a> {
89 fn new(src: &'a str) -> Self {
90 Self {
91 src,
92 bytes: src.as_bytes(),
93 pos: 0,
94 }
95 }
96
97 fn at_end(&self) -> bool {
98 self.pos >= self.bytes.len()
99 }
100
101 fn peek(&self) -> Option<u8> {
102 self.bytes.get(self.pos).copied()
103 }
104
105 fn bump(&mut self) {
106 self.pos += 1;
107 }
108
109 fn skip_whitespace(&mut self) {
110 while let Some(b) = self.peek() {
111 if b.is_ascii_whitespace() {
112 self.bump();
113 } else {
114 break;
115 }
116 }
117 }
118
119 fn error_here(&self, kind: BibParseErrorKind) -> BibParseError {
120 BibParseError::new(kind, self.pos)
121 }
122
123 fn error_at(&self, offset: usize, kind: BibParseErrorKind) -> BibParseError {
124 BibParseError::new(kind, offset)
125 }
126
127 fn expect_byte(&mut self, byte: u8, kind: BibParseErrorKind) -> Result<(), BibParseError> {
129 if self.peek() == Some(byte) {
130 self.bump();
131 Ok(())
132 } else {
133 Err(self.error_here(kind))
134 }
135 }
136
137 fn take_identifier(&mut self) -> Option<String> {
140 let start = self.pos;
141 while let Some(b) = self.peek() {
142 if is_identifier_byte(b) {
143 self.bump();
144 } else {
145 break;
146 }
147 }
148 if self.pos == start {
149 None
150 } else {
151 Some(self.src[start..self.pos].to_ascii_lowercase())
152 }
153 }
154
155 fn parse_entry(&mut self) -> Result<ParsedEntry, BibParseError> {
156 self.expect_byte(b'@', BibParseErrorKind::ExpectedAt)?;
157 self.skip_whitespace();
158 let entry_type = self
159 .take_identifier()
160 .ok_or_else(|| self.error_here(BibParseErrorKind::ExpectedEntryType))?;
161 self.skip_whitespace();
162 self.expect_byte(b'{', BibParseErrorKind::ExpectedOpenBrace)?;
163 self.skip_whitespace();
164 let key = self.parse_key()?;
165 self.skip_whitespace();
166 let mut fields = BTreeMap::new();
167 match self.peek() {
168 Some(b'}') => self.bump(),
169 Some(b',') => {
170 self.bump();
171 self.parse_fields(&mut fields)?;
172 }
173 Some(_) => return Err(self.error_here(BibParseErrorKind::ExpectedCommaOrCloseBrace)),
174 None => return Err(self.error_here(BibParseErrorKind::UnterminatedEntry)),
175 }
176 let key_span = key.offset..key.offset + key.text.len();
177 Ok(ParsedEntry {
178 entry: BibEntry {
179 entry_type,
180 key: key.text,
181 key_span,
182 fields,
183 },
184 key_offset: key.offset,
185 })
186 }
187
188 fn parse_key(&mut self) -> Result<ParsedKey, BibParseError> {
191 let start = self.pos;
192 while let Some(b) = self.peek() {
193 if is_key_byte(b) {
194 self.bump();
195 } else {
196 break;
197 }
198 }
199 if self.pos == start {
200 return Err(self.error_here(BibParseErrorKind::ExpectedKey));
201 }
202 Ok(ParsedKey {
203 text: self.src[start..self.pos].to_owned(),
204 offset: start,
205 })
206 }
207
208 fn parse_fields(&mut self, fields: &mut BTreeMap<String, String>) -> Result<(), BibParseError> {
212 let mut saw_field = false;
213 loop {
214 self.skip_whitespace();
215 match self.peek() {
216 Some(b'}') if saw_field => {
220 self.bump();
221 return Ok(());
222 }
223 Some(b'}') => return Err(self.error_here(BibParseErrorKind::ExpectedFieldName)),
224 None => return Err(self.error_here(BibParseErrorKind::UnterminatedEntry)),
225 _ => {}
226 }
227 let name = self
228 .take_identifier()
229 .ok_or_else(|| self.error_here(BibParseErrorKind::ExpectedFieldName))?;
230 self.skip_whitespace();
231 self.expect_byte(b'=', BibParseErrorKind::ExpectedEquals)?;
232 self.skip_whitespace();
233 let value = self.parse_value()?;
234 fields.insert(name, value);
236 saw_field = true;
237 self.skip_whitespace();
238 match self.peek() {
239 Some(b',') => self.bump(),
240 Some(b'}') => {
241 self.bump();
242 return Ok(());
243 }
244 None => return Err(self.error_here(BibParseErrorKind::UnterminatedEntry)),
245 Some(_) => {
246 return Err(self.error_here(BibParseErrorKind::ExpectedCommaOrCloseBrace));
247 }
248 }
249 }
250 }
251
252 fn parse_value(&mut self) -> Result<String, BibParseError> {
253 match self.peek() {
254 Some(b'{') => self.parse_braced(),
255 Some(b'"') => self.parse_quoted(),
256 Some(b) if is_bare_value_byte(b) => Ok(self.take_bare_value()),
257 _ => Err(self.error_here(BibParseErrorKind::ExpectedValue)),
258 }
259 }
260
261 fn parse_braced(&mut self) -> Result<String, BibParseError> {
264 let open_offset = self.pos;
265 self.bump(); let content_start = self.pos;
267 let mut depth = 1_usize;
268 while let Some(b) = self.peek() {
269 match b {
270 b'{' => depth += 1,
271 b'}' => {
272 depth -= 1;
273 if depth == 0 {
274 let value = self.src[content_start..self.pos].to_owned();
275 self.bump(); return Ok(value);
277 }
278 }
279 _ => {}
280 }
281 self.bump();
282 }
283 Err(self.error_at(open_offset, BibParseErrorKind::UnterminatedValue))
284 }
285
286 fn parse_quoted(&mut self) -> Result<String, BibParseError> {
290 let open_offset = self.pos;
291 self.bump(); let content_start = self.pos;
293 let mut depth = 0_usize;
294 while let Some(b) = self.peek() {
295 match b {
296 b'\\' => {
297 self.bump();
298 if !self.at_end() {
299 self.bump();
300 }
301 continue;
302 }
303 b'{' => depth += 1,
304 b'}' if depth > 0 => depth -= 1,
305 b'"' if depth == 0 => {
306 let value = self.src[content_start..self.pos].to_owned();
307 self.bump(); return Ok(value);
309 }
310 _ => {}
311 }
312 self.bump();
313 }
314 Err(self.error_at(open_offset, BibParseErrorKind::UnterminatedValue))
315 }
316
317 fn take_bare_value(&mut self) -> String {
321 let start = self.pos;
322 while let Some(b) = self.peek() {
323 if is_bare_value_byte(b) {
324 self.bump();
325 } else {
326 break;
327 }
328 }
329 self.src[start..self.pos].to_owned()
330 }
331}
332
333fn is_identifier_byte(b: u8) -> bool {
335 b.is_ascii_alphanumeric() || matches!(b, b'_' | b'-' | b'+' | b'.' | b':' | b'/')
336}
337
338fn is_key_byte(b: u8) -> bool {
341 !b.is_ascii_whitespace() && !matches!(b, b',' | b'{' | b'}' | b'"' | b'=' | b'@')
342}
343
344fn is_bare_value_byte(b: u8) -> bool {
346 b.is_ascii_alphanumeric() || matches!(b, b'_' | b'-' | b'+' | b'.' | b':' | b'/')
347}