mos_core/span.rs
1//! Source locations.
2//!
3//! A [`SourceSpan`] is a byte range in a named source file; [`linecol`]
4//! converts a byte offset within source text into a 1-based `(line, column)`
5//! pair for rendering.
6
7use std::path::PathBuf;
8
9/// A byte-range location in a source file (manifest §6 stage 1).
10///
11/// # Examples
12///
13/// ```
14/// use std::path::PathBuf;
15///
16/// use mos_core::SourceSpan;
17///
18/// let span = SourceSpan::new(PathBuf::from("main.mos"), 2, 8);
19///
20/// assert_eq!(span.start(), 2);
21/// ```
22///
23/// `start` and `end` are private so the `start <= end` invariant cannot be
24/// violated after construction; read them through [`SourceSpan::start`],
25/// [`SourceSpan::end`], or [`SourceSpan::range`].
26#[derive(Clone, Debug, Eq, PartialEq)]
27pub struct SourceSpan {
28 /// The source file this range points into.
29 pub file: PathBuf,
30 /// Byte offset of the first covered byte (inclusive).
31 start: usize,
32 /// Byte offset one past the last covered byte (exclusive); always
33 /// `>= start`.
34 end: usize,
35}
36
37impl SourceSpan {
38 /// Construct a span covering `start..end` in `file`.
39 ///
40 /// # Examples
41 ///
42 /// ```
43 /// use std::path::PathBuf;
44 ///
45 /// use mos_core::SourceSpan;
46 ///
47 /// let span = SourceSpan::new(PathBuf::from("main.mos"), 4, 9);
48 ///
49 /// assert_eq!(span.end(), 9);
50 /// ```
51 ///
52 /// # Panics
53 ///
54 /// Panics in debug builds if `start > end`; a backwards span is a
55 /// programmer error, never user input.
56 #[must_use]
57 pub fn new(file: PathBuf, start: usize, end: usize) -> Self {
58 debug_assert!(
59 start <= end,
60 "SourceSpan start ({start}) must not exceed end ({end})"
61 );
62 Self { file, start, end }
63 }
64
65 /// Byte offset of the first covered byte (inclusive).
66 #[must_use]
67 pub const fn start(&self) -> usize {
68 self.start
69 }
70
71 /// Byte offset one past the last covered byte (exclusive); always
72 /// `>= start`.
73 #[must_use]
74 pub const fn end(&self) -> usize {
75 self.end
76 }
77
78 /// The covered byte range, ready to slice the source text it points into.
79 ///
80 /// # Examples
81 ///
82 /// ```
83 /// use std::path::PathBuf;
84 ///
85 /// use mos_core::SourceSpan;
86 ///
87 /// let src = "let x = 1;";
88 /// let span = SourceSpan::new(PathBuf::from("main.mos"), 4, 5);
89 ///
90 /// assert_eq!(&src[span.range()], "x");
91 /// ```
92 #[must_use]
93 pub fn range(&self) -> std::ops::Range<usize> {
94 self.start..self.end
95 }
96
97 /// Move the start of the span to `start`, preserving `start <= end`.
98 ///
99 /// # Panics
100 ///
101 /// Panics if `start` would exceed the current `end`. Enforced in all
102 /// builds so an inverted span can never escape into release.
103 pub fn set_start(&mut self, start: usize) {
104 assert!(
105 start <= self.end,
106 "SourceSpan start ({start}) must not exceed end ({})",
107 self.end
108 );
109 self.start = start;
110 }
111
112 /// Move the end of the span to `end`, preserving `start <= end`.
113 ///
114 /// # Panics
115 ///
116 /// Panics if `end` would fall below the current `start`. Enforced in all
117 /// builds so an inverted span can never escape into release.
118 pub fn set_end(&mut self, end: usize) {
119 assert!(
120 self.start <= end,
121 "SourceSpan end ({end}) must not fall below start ({})",
122 self.start
123 );
124 self.end = end;
125 }
126
127 /// A zero-length placeholder span anchored at the start of `file`.
128 ///
129 /// # Examples
130 ///
131 /// ```
132 /// use std::path::PathBuf;
133 ///
134 /// use mos_core::SourceSpan;
135 ///
136 /// let span = SourceSpan::placeholder(PathBuf::from("main.mos"));
137 ///
138 /// assert_eq!((span.start(), span.end()), (0, 0));
139 /// ```
140 #[must_use]
141 pub fn placeholder(file: PathBuf) -> Self {
142 Self {
143 file,
144 start: 0,
145 end: 0,
146 }
147 }
148}
149
150/// Convert a byte offset into a 1-based `(line, column)` pair.
151///
152/// `src` is treated as UTF-8; columns are counted in *Unicode scalar
153/// values* (i.e. `char`s), not bytes, so a span pointing at the byte
154/// after `µ` reports column 2 rather than 3. Both the returned line
155/// and column are at least 1, and offsets past the end of `src` are
156/// clamped to the end. Offsets that fall in the middle of a UTF-8
157/// code-point round down to the start of that code-point.
158///
159/// # Examples
160///
161/// ```
162/// use mos_core::linecol;
163///
164/// assert_eq!(linecol("a\nb", 2), (2, 1));
165/// ```
166#[must_use]
167pub fn linecol(src: &str, byte_offset: usize) -> (usize, usize) {
168 let mut clamped = byte_offset.min(src.len());
169 while clamped > 0 && !src.is_char_boundary(clamped) {
170 clamped -= 1;
171 }
172 let mut line = 1_usize;
173 let mut line_start = 0_usize;
174 for (i, b) in src.as_bytes().iter().enumerate().take(clamped) {
175 if *b == b'\n' {
176 line += 1;
177 line_start = i + 1;
178 }
179 }
180 let column = src[line_start..clamped].chars().count() + 1;
181 (line, column)
182}
183
184#[cfg(test)]
185mod tests {
186 use super::*;
187
188 #[test]
189 fn linecol_handles_ascii_offsets() {
190 let src = "ab\ncd\nef";
191 assert_eq!(linecol(src, 0), (1, 1));
192 assert_eq!(linecol(src, 1), (1, 2));
193 assert_eq!(linecol(src, 2), (1, 3));
194 assert_eq!(linecol(src, 3), (2, 1));
195 assert_eq!(linecol(src, 6), (3, 1));
196 assert_eq!(linecol(src, 7), (3, 2));
197 // Past the end clamps.
198 assert_eq!(linecol(src, 9999), (3, 3));
199 }
200
201 #[test]
202 fn linecol_counts_chars_not_bytes() {
203 // `µ` is 2 bytes in UTF-8, `字` is 3 bytes. The column for the
204 // byte after them should still be 2, not 3 / 4.
205 let src = "µx\n字y\n";
206 assert_eq!(linecol(src, 0), (1, 1));
207 assert_eq!(linecol(src, 2), (1, 2)); // after `µ`
208 assert_eq!(linecol(src, 3), (1, 3)); // after `µx`
209 assert_eq!(linecol(src, 4), (2, 1)); // start of line 2
210 assert_eq!(linecol(src, 7), (2, 2)); // after `字`
211 }
212
213 #[test]
214 fn linecol_offsets_inside_codepoints_round_down() {
215 // Pointing at the second byte of `µ` should still report
216 // column 1 of line 1, not panic.
217 let src = "µ";
218 assert_eq!(linecol(src, 1), (1, 1));
219 }
220}