1use mos_fonts::{EmbeddedFontId, Font, ShapedGlyph, WordSubRun, shape_with_fallback, text_width};
2
3#[derive(Clone, Debug)]
4pub(crate) struct Word {
5 pub(crate) text: String,
6 pub(crate) actual_text: Option<String>,
7 pub(crate) font: Font,
13 pub(crate) size_pt: f32,
14 pub(crate) width_pt: f32,
18 pub(crate) subruns: Vec<WordSubRun>,
28 pub(crate) shy_break_offsets: Vec<usize>,
38}
39
40#[derive(Clone, Debug)]
47pub(crate) struct ShyBreak {
48 pub(crate) prefix: Word,
49 pub(crate) suffix: Word,
50}
51
52pub(crate) fn try_shy_break(
68 word: &Word,
69 max_prefix_width: f32,
70 fallbacks: &[EmbeddedFontId],
71) -> Option<ShyBreak> {
72 if word.shy_break_offsets.is_empty() {
73 return None;
74 }
75 let text_len = word.text.len();
76 let mut seen: Option<usize> = None;
79 for &off in word.shy_break_offsets.iter().rev() {
80 if off == 0 || off >= text_len {
81 continue;
82 }
83 if seen == Some(off) {
84 continue;
85 }
86 seen = Some(off);
87 let Some(prefix_src) = word.text.get(..off) else {
88 continue;
89 };
90 let mut prefix_text = String::with_capacity(prefix_src.len() + 1);
91 prefix_text.push_str(prefix_src);
92 prefix_text.push('-');
93 let prefix_subruns = shape_with_fallback(word.font, fallbacks, word.size_pt, &prefix_text);
94 let prefix_width: f32 = prefix_subruns.iter().map(|s| s.advance_pt).sum();
95 if prefix_width > max_prefix_width {
96 continue;
99 }
100 let Some(suffix_src) = word.text.get(off..) else {
101 continue;
102 };
103 let suffix_text = suffix_src.to_owned();
104 let suffix_len = suffix_text.len();
105 let suffix_offsets: Vec<usize> = word
106 .shy_break_offsets
107 .iter()
108 .filter_map(|&o| {
109 if o > off {
110 let rebased = o - off;
111 if rebased > 0 && rebased < suffix_len {
112 Some(rebased)
113 } else {
114 None
115 }
116 } else {
117 None
118 }
119 })
120 .collect();
121 let suffix_subruns = shape_with_fallback(word.font, fallbacks, word.size_pt, &suffix_text);
122 let suffix_width: f32 = suffix_subruns.iter().map(|s| s.advance_pt).sum();
123 let prefix = Word {
124 text: prefix_text,
125 actual_text: None,
126 font: word.font,
127 size_pt: word.size_pt,
128 width_pt: prefix_width,
129 subruns: prefix_subruns,
130 shy_break_offsets: Vec::new(),
133 };
134 let suffix = Word {
135 text: suffix_text,
136 actual_text: None,
137 font: word.font,
138 size_pt: word.size_pt,
139 width_pt: suffix_width,
140 subruns: suffix_subruns,
141 shy_break_offsets: suffix_offsets,
142 };
143 return Some(ShyBreak { prefix, suffix });
144 }
145 None
146}
147
148#[derive(Clone, Debug)]
153pub(crate) enum WordItem {
154 Word(Word),
155 HardBreak,
156}
157
158pub(crate) fn split_soft_hyphens(text: &str) -> (String, Vec<usize>) {
173 if !text.contains('\u{AD}') {
174 return (text.to_owned(), Vec::new());
175 }
176 let mut stripped = String::with_capacity(text.len());
177 let mut offsets = Vec::new();
178 for ch in text.chars() {
179 if ch == '\u{AD}' {
180 offsets.push(stripped.len());
181 } else {
182 stripped.push(ch);
183 }
184 }
185 (stripped, offsets)
186}
187
188pub(crate) fn word_clusters(word: &Word) -> Vec<WordSubRun> {
189 let mut clusters = Vec::new();
190 for sub in &word.subruns {
191 if sub.glyphs.is_empty() {
192 for ch in sub.text.chars() {
193 let mut text = String::new();
194 text.push(ch);
195 clusters.push(WordSubRun {
196 font: sub.font,
197 advance_pt: text_width(sub.font, word.size_pt, &text),
198 text,
199 glyphs: Vec::new(),
200 });
201 }
202 continue;
203 }
204
205 let mut i = 0;
206 while i < sub.glyphs.len() {
207 let cluster = sub.glyphs[i].cluster;
208 let mut j = i + 1;
209 while j < sub.glyphs.len() && sub.glyphs[j].cluster == cluster {
210 j += 1;
211 }
212 let start = usize::try_from(cluster).unwrap_or(usize::MAX);
213 let end = if j < sub.glyphs.len() {
214 usize::try_from(sub.glyphs[j].cluster).unwrap_or(usize::MAX)
215 } else {
216 sub.text.len()
217 };
218 debug_assert!(start <= end && end <= sub.text.len());
219 let Some(text) = sub.text.get(start..end) else {
220 i = j;
221 continue;
222 };
223 let shift = u32::try_from(start).unwrap_or(u32::MAX);
224 let glyphs: Vec<_> = sub.glyphs[i..j]
225 .iter()
226 .map(|g| ShapedGlyph {
227 cluster: g.cluster.saturating_sub(shift),
228 ..*g
229 })
230 .collect();
231 clusters.push(WordSubRun {
232 font: sub.font,
233 text: text.to_owned(),
234 advance_pt: glyphs_advance_pt(sub.font, word.size_pt, &glyphs),
235 glyphs,
236 });
237 i = j;
238 }
239 }
240 clusters
241}
242
243fn glyphs_advance_pt(font: Font, size_pt: f32, glyphs: &[ShapedGlyph]) -> f32 {
244 let upem = match font {
245 Font::Embedded(id) => f32::from(id.data().units_per_em),
246 Font::Base14(_) => 1000.0,
247 };
248 glyphs
251 .iter()
252 .map(|g| mos_fonts::advance_units_to_pt(g.advance_units, size_pt, upem))
253 .sum()
254}
255
256#[cfg(test)]
257mod tests {
258 use super::{Word, split_soft_hyphens, try_shy_break};
259 use mos_fonts::{Base14Font, Font, WordSubRun, shape_with_fallback, text_width};
260
261 fn make_shy_word(text: &str, offsets: Vec<usize>) -> Word {
262 let font = Font::Base14(Base14Font::Helvetica);
263 let size_pt = 12.0;
264 let subruns: Vec<WordSubRun> = shape_with_fallback(font, &[], size_pt, text);
265 let width_pt: f32 = subruns.iter().map(|s| s.advance_pt).sum();
266 Word {
267 text: text.to_owned(),
268 actual_text: None,
269 font,
270 size_pt,
271 width_pt,
272 subruns,
273 shy_break_offsets: offsets,
274 }
275 }
276
277 #[test]
278 fn split_soft_hyphens_no_op_when_absent() {
279 let (stripped, offsets) = split_soft_hyphens("hello");
280 assert_eq!(stripped, "hello");
281 assert!(offsets.is_empty());
282 }
283
284 #[test]
285 fn split_soft_hyphens_records_offsets_in_stripped_text() {
286 let (stripped, offsets) = split_soft_hyphens("super\u{AD}cali\u{AD}fragil");
291 assert_eq!(stripped, "supercalifragil");
292 assert_eq!(offsets, vec![5, 9]);
293 }
294
295 #[test]
296 fn split_soft_hyphens_handles_consecutive_shy() {
297 let (stripped, offsets) = split_soft_hyphens("a\u{AD}\u{AD}b");
300 assert_eq!(stripped, "ab");
301 assert_eq!(offsets, vec![1, 1]);
302 }
303
304 #[test]
305 fn try_shy_break_returns_none_when_no_offsets() {
306 let word = make_shy_word("hello", Vec::new());
307 assert!(try_shy_break(&word, 1000.0, &[]).is_none());
308 }
309
310 #[test]
311 fn try_shy_break_picks_latest_offset_that_fits() {
312 let word = make_shy_word("supercalifragil", vec![5, 9]);
315 let result = try_shy_break(&word, 1000.0, &[]).expect("must split");
316 assert_eq!(result.prefix.text, "supercali-");
317 assert_eq!(result.suffix.text, "fragil");
318 assert!(result.suffix.shy_break_offsets.is_empty());
319 }
320
321 #[test]
322 fn try_shy_break_falls_back_to_earlier_offset_when_latest_overflows() {
323 let word = make_shy_word("supercalifragil", vec![5, 9]);
325 let font = word.font;
326 let size = word.size_pt;
327 let max = text_width(font, size, "super-") + 0.5;
328 let result = try_shy_break(&word, max, &[]).expect("must split");
329 assert_eq!(result.prefix.text, "super-");
330 assert_eq!(result.suffix.text, "califragil");
331 assert_eq!(result.suffix.shy_break_offsets, vec![4]);
333 }
334
335 #[test]
336 fn try_shy_break_ignores_leading_and_trailing_offsets() {
337 let word = make_shy_word("foo", vec![0, 3]);
339 assert!(try_shy_break(&word, 1000.0, &[]).is_none());
340 }
341
342 #[test]
343 fn try_shy_break_returns_none_when_no_break_fits() {
344 let word = make_shy_word("supercalifragil", vec![5, 9]);
346 assert!(try_shy_break(&word, 1.0, &[]).is_none());
347 }
348
349 #[test]
350 fn try_shy_break_dedupes_consecutive_duplicate_offsets() {
351 let word = make_shy_word("ab", vec![1, 1]);
354 let result = try_shy_break(&word, 1000.0, &[]).expect("must split");
355 assert_eq!(result.prefix.text, "a-");
356 assert_eq!(result.suffix.text, "b");
357 }
358}