Skip to main content

mos_core/
span.rs

1//! Source locations.
2//!
3//! A [`SourceSpan`] is a byte range in a named source file; [`linecol`]
4//! converts a byte offset within source text into a 1-based `(line, column)`
5//! pair for rendering.
6
7use std::path::PathBuf;
8
9/// A byte-range location in a source file (manifest §6 stage 1).
10///
11/// # Examples
12///
13/// ```
14/// use std::path::PathBuf;
15///
16/// use mos_core::SourceSpan;
17///
18/// let span = SourceSpan::new(PathBuf::from("main.mos"), 2, 8);
19///
20/// assert_eq!(span.start(), 2);
21/// ```
22///
23/// `start` and `end` are private so the `start <= end` invariant cannot be
24/// violated after construction; read them through [`SourceSpan::start`],
25/// [`SourceSpan::end`], or [`SourceSpan::range`].
26#[derive(Clone, Debug, Eq, PartialEq)]
27pub struct SourceSpan {
28    /// The source file this range points into.
29    pub file: PathBuf,
30    /// Byte offset of the first covered byte (inclusive).
31    start: usize,
32    /// Byte offset one past the last covered byte (exclusive); always
33    /// `>= start`.
34    end: usize,
35}
36
37impl SourceSpan {
38    /// Construct a span covering `start..end` in `file`.
39    ///
40    /// # Examples
41    ///
42    /// ```
43    /// use std::path::PathBuf;
44    ///
45    /// use mos_core::SourceSpan;
46    ///
47    /// let span = SourceSpan::new(PathBuf::from("main.mos"), 4, 9);
48    ///
49    /// assert_eq!(span.end(), 9);
50    /// ```
51    ///
52    /// # Panics
53    ///
54    /// Panics in debug builds if `start > end`; a backwards span is a
55    /// programmer error, never user input.
56    #[must_use]
57    pub fn new(file: PathBuf, start: usize, end: usize) -> Self {
58        debug_assert!(
59            start <= end,
60            "SourceSpan start ({start}) must not exceed end ({end})"
61        );
62        Self { file, start, end }
63    }
64
65    /// Byte offset of the first covered byte (inclusive).
66    #[must_use]
67    pub const fn start(&self) -> usize {
68        self.start
69    }
70
71    /// Byte offset one past the last covered byte (exclusive); always
72    /// `>= start`.
73    #[must_use]
74    pub const fn end(&self) -> usize {
75        self.end
76    }
77
78    /// The covered byte range, ready to slice the source text it points into.
79    ///
80    /// # Examples
81    ///
82    /// ```
83    /// use std::path::PathBuf;
84    ///
85    /// use mos_core::SourceSpan;
86    ///
87    /// let src = "let x = 1;";
88    /// let span = SourceSpan::new(PathBuf::from("main.mos"), 4, 5);
89    ///
90    /// assert_eq!(&src[span.range()], "x");
91    /// ```
92    #[must_use]
93    pub fn range(&self) -> std::ops::Range<usize> {
94        self.start..self.end
95    }
96
97    /// Move the start of the span to `start`, preserving `start <= end`.
98    ///
99    /// # Panics
100    ///
101    /// Panics if `start` would exceed the current `end`. Enforced in all
102    /// builds so an inverted span can never escape into release.
103    pub fn set_start(&mut self, start: usize) {
104        assert!(
105            start <= self.end,
106            "SourceSpan start ({start}) must not exceed end ({})",
107            self.end
108        );
109        self.start = start;
110    }
111
112    /// Move the end of the span to `end`, preserving `start <= end`.
113    ///
114    /// # Panics
115    ///
116    /// Panics if `end` would fall below the current `start`. Enforced in all
117    /// builds so an inverted span can never escape into release.
118    pub fn set_end(&mut self, end: usize) {
119        assert!(
120            self.start <= end,
121            "SourceSpan end ({end}) must not fall below start ({})",
122            self.start
123        );
124        self.end = end;
125    }
126
127    /// A zero-length placeholder span anchored at the start of `file`.
128    ///
129    /// # Examples
130    ///
131    /// ```
132    /// use std::path::PathBuf;
133    ///
134    /// use mos_core::SourceSpan;
135    ///
136    /// let span = SourceSpan::placeholder(PathBuf::from("main.mos"));
137    ///
138    /// assert_eq!((span.start(), span.end()), (0, 0));
139    /// ```
140    #[must_use]
141    pub fn placeholder(file: PathBuf) -> Self {
142        Self {
143            file,
144            start: 0,
145            end: 0,
146        }
147    }
148}
149
150/// Convert a byte offset into a 1-based `(line, column)` pair.
151///
152/// `src` is treated as UTF-8; columns are counted in *Unicode scalar
153/// values* (i.e. `char`s), not bytes, so a span pointing at the byte
154/// after `µ` reports column 2 rather than 3. Both the returned line
155/// and column are at least 1, and offsets past the end of `src` are
156/// clamped to the end. Offsets that fall in the middle of a UTF-8
157/// code-point round down to the start of that code-point.
158///
159/// # Examples
160///
161/// ```
162/// use mos_core::linecol;
163///
164/// assert_eq!(linecol("a\nb", 2), (2, 1));
165/// ```
166#[must_use]
167pub fn linecol(src: &str, byte_offset: usize) -> (usize, usize) {
168    let mut clamped = byte_offset.min(src.len());
169    while clamped > 0 && !src.is_char_boundary(clamped) {
170        clamped -= 1;
171    }
172    let mut line = 1_usize;
173    let mut line_start = 0_usize;
174    for (i, b) in src.as_bytes().iter().enumerate().take(clamped) {
175        if *b == b'\n' {
176            line += 1;
177            line_start = i + 1;
178        }
179    }
180    let column = src[line_start..clamped].chars().count() + 1;
181    (line, column)
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187
188    #[test]
189    fn linecol_handles_ascii_offsets() {
190        let src = "ab\ncd\nef";
191        assert_eq!(linecol(src, 0), (1, 1));
192        assert_eq!(linecol(src, 1), (1, 2));
193        assert_eq!(linecol(src, 2), (1, 3));
194        assert_eq!(linecol(src, 3), (2, 1));
195        assert_eq!(linecol(src, 6), (3, 1));
196        assert_eq!(linecol(src, 7), (3, 2));
197        // Past the end clamps.
198        assert_eq!(linecol(src, 9999), (3, 3));
199    }
200
201    #[test]
202    fn linecol_counts_chars_not_bytes() {
203        // `µ` is 2 bytes in UTF-8, `字` is 3 bytes. The column for the
204        // byte after them should still be 2, not 3 / 4.
205        let src = "µx\n字y\n";
206        assert_eq!(linecol(src, 0), (1, 1));
207        assert_eq!(linecol(src, 2), (1, 2)); // after `µ`
208        assert_eq!(linecol(src, 3), (1, 3)); // after `µx`
209        assert_eq!(linecol(src, 4), (2, 1)); // start of line 2
210        assert_eq!(linecol(src, 7), (2, 2)); // after `字`
211    }
212
213    #[test]
214    fn linecol_offsets_inside_codepoints_round_down() {
215        // Pointing at the second byte of `µ` should still report
216        // column 1 of line 1, not panic.
217        let src = "µ";
218        assert_eq!(linecol(src, 1), (1, 1));
219    }
220}