Skip to main content

mos_pdf/
content.rs

1//! Per-page PDF content stream emission.
2
3use std::collections::HashMap;
4
5use mos_core::{CoreError, Diagnostic, DiagnosticAnnotation, Result, codes};
6use mos_fonts::EmbeddedFontId;
7use mos_layout::{Font, TextRun};
8use pdf_writer::{Content, Name, Str, TextStr};
9
10use crate::embedded::{self, ContentOp, EmbeddedFontPlan};
11use crate::encoding::DocEncoding;
12use crate::images;
13
14/// Build the per-page content stream. The layout engine measures
15/// baselines from the **top** of the page; PDF's coordinate system is
16/// bottom-origin, so we flip once here.
17///
18/// Base14 runs encode through the planner's per-face `DocEncoding`
19/// (`WinAnsi` byte + `/Differences` remap; characters outside both
20/// tiers silently render as `?`). Embedded-font runs encode the
21/// shaped glyph stream as big-endian `u16` CIDs.
22///
23/// Image placements (raster `XObject`s) emit *outside* the text object
24///: `BT/ET` brackets only permit text operators, so each image
25/// placement is wrapped in its own `q ... Q` save/restore pair before
26/// the text block starts. Putting images first means subsequent text
27/// can overlay (e.g. a caption beneath the image is unaffected, but
28/// in-line annotations atop a figure would land on top).
29pub(crate) fn build_content_stream(
30    page_height_pt: f32,
31    page: &mos_layout::Page,
32    encodings: &HashMap<Font, DocEncoding>,
33    embedded_by_id: &HashMap<EmbeddedFontId, &EmbeddedFontPlan>,
34) -> Result<Vec<u8>> {
35    let mut content = Content::new();
36    for placement in &page.images {
37        images::emit_placement(&mut content, page_height_pt, placement);
38    }
39    if page.runs.is_empty() {
40        return Ok(content.finish().to_vec());
41    }
42    content.begin_text();
43    let mut i = 0;
44    while i < page.runs.len() {
45        let run = &page.runs[i];
46        if let Some(actual_text) = run.actual_text.as_deref() {
47            {
48                let mut marked = content.begin_marked_content_with_properties(Name(b"Span"));
49                marked.properties().actual_text(TextStr(actual_text));
50            }
51            while i < page.runs.len() && page.runs[i].actual_text.as_deref() == Some(actual_text) {
52                emit_text_run(
53                    &mut content,
54                    page_height_pt,
55                    &page.runs[i],
56                    encodings,
57                    embedded_by_id,
58                )?;
59                i += 1;
60            }
61            content.end_marked_content();
62        } else {
63            emit_text_run(&mut content, page_height_pt, run, encodings, embedded_by_id)?;
64            i += 1;
65        }
66    }
67    content.end_text();
68    Ok(content.finish().to_vec())
69}
70
71fn emit_text_run(
72    content: &mut Content,
73    page_height_pt: f32,
74    run: &TextRun,
75    encodings: &HashMap<Font, DocEncoding>,
76    embedded_by_id: &HashMap<EmbeddedFontId, &EmbeddedFontPlan>,
77) -> Result<()> {
78    content.set_font(Name(run.font.pdf_resource_name()), run.size_pt);
79    let y_from_bottom = page_height_pt - run.baseline_from_top_pt;
80    match run.font {
81        Font::Base14(_) => {
82            content.set_text_matrix([1.0, 0.0, 0.0, 1.0, run.x_pt, y_from_bottom]);
83            let bytes = encode_base14_run(&run.text, run.font, encodings);
84            content.show(Str(&bytes));
85        }
86        Font::Embedded(id) => {
87            let plan = embedded_by_id.get(&id).ok_or_else(|| {
88                CoreError::Diagnostic(Box::new(
89                    Diagnostic::simple(
90                        &codes::MOS0021,
91                        None,
92                        format!("missing embedded font plan for {:?} (id {id:?})", run.font),
93                    )
94                    .with_annotation(DiagnosticAnnotation::Note(
95                        "PDF emission expected an embedded plan for every embedded text run"
96                            .to_owned(),
97                    )),
98                ))
99            })?;
100            emit_embedded_glyph_run(content, plan, run, y_from_bottom);
101        }
102    }
103    Ok(())
104}
105
106fn emit_embedded_glyph_run(
107    content: &mut Content,
108    plan: &EmbeddedFontPlan,
109    run: &TextRun,
110    y_from_bottom: f32,
111) {
112    let ops = embedded::encode_glyph_run(plan, &run.glyphs, run.size_pt, run.x_pt, y_from_bottom);
113    let mut pending: Vec<PositionedItem> = Vec::new();
114    for op in ops {
115        match op {
116            ContentOp::SetTextMatrix(matrix) => {
117                emit_text_items(content, &mut pending);
118                content.set_text_matrix(matrix);
119            }
120            ContentOp::ShowCids(cids) => {
121                pending.push(PositionedItem::Cids(cids));
122            }
123            ContentOp::AdjustText(amount) => {
124                pending.push(PositionedItem::Adjust(amount));
125            }
126        }
127    }
128    emit_text_items(content, &mut pending);
129}
130
131enum PositionedItem {
132    Cids(Vec<u16>),
133    Adjust(f32),
134}
135
136fn emit_text_items(content: &mut Content, pending: &mut Vec<PositionedItem>) {
137    if pending.is_empty() {
138        return;
139    }
140
141    if pending
142        .iter()
143        .any(|item| matches!(item, PositionedItem::Adjust(_)))
144    {
145        emit_positioned_text(content, pending);
146    } else {
147        emit_simple_text(content, pending);
148    }
149}
150
151fn emit_positioned_text(content: &mut Content, pending: &mut Vec<PositionedItem>) {
152    let mut show = content.show_positioned();
153    let mut items = show.items();
154    for item in pending.drain(..) {
155        match item {
156            PositionedItem::Cids(cids) => {
157                let bytes = embedded::cids_to_bytes(&cids);
158                items.show(Str(&bytes));
159            }
160            PositionedItem::Adjust(amount) => {
161                items.adjust(amount);
162            }
163        }
164    }
165}
166
167fn emit_simple_text(content: &mut Content, pending: &mut Vec<PositionedItem>) {
168    for item in pending.drain(..) {
169        if let PositionedItem::Cids(cids) = item {
170            let bytes = embedded::cids_to_bytes(&cids);
171            content.show(Str(&bytes));
172        }
173    }
174}
175
176/// Encode `text` against a Base14 face's `DocEncoding`. The planner
177/// guarantees `byte_for_char` covers every `WinAnsi` native and every
178/// extended Latin char that fit into the 256-slot budget; any char
179/// outside both: Cyrillic, CJK, emoji: renders as `?`. Documents
180/// that need real coverage should pick the bundled Noto Sans family
181/// (the default; users hit this Base14 path only by explicitly asking
182/// for `Helvetica`/`Times`/`Courier` via `#set text(font: ...)`).
183fn encode_base14_run(text: &str, font: Font, encodings: &HashMap<Font, DocEncoding>) -> Vec<u8> {
184    let map = encodings.get(&font).map(|e| &e.byte_for_char);
185    let mut out = Vec::with_capacity(text.len());
186    for ch in text.chars() {
187        let byte = map
188            .and_then(|m| m.get(&ch).copied())
189            .or_else(|| mos_fonts::winansi_byte(ch))
190            .unwrap_or(b'?');
191        out.push(byte);
192    }
193    out
194}
195
196#[cfg(test)]
197mod tests {
198    use std::collections::HashMap;
199    use std::error::Error;
200
201    use mos_layout::{Page, TextRun};
202
203    use super::*;
204
205    type TestResult = std::result::Result<(), Box<dyn Error>>;
206
207    macro_rules! ensure {
208        ($cond:expr, $($arg:tt)*) => {
209            if !$cond {
210                return Err(format!($($arg)*).into());
211            }
212        };
213    }
214
215    #[test]
216    fn missing_embedded_plan_returns_diagnostic() -> TestResult {
217        let face = EmbeddedFontId::Regular;
218        let page = Page {
219            number: 1,
220            width_pt: 595.276_f32,
221            height_pt: 841.89_f32,
222            runs: vec![TextRun {
223                x_pt: 68.0,
224                baseline_from_top_pt: 100.0,
225                size_pt: 12.0,
226                font: Font::Embedded(face),
227                text: "Body".to_owned(),
228                actual_text: None,
229                glyphs: mos_fonts::shape(face.data(), "Body"),
230            }],
231            images: Vec::new(),
232        };
233
234        let err = build_content_stream(
235            page.height_pt,
236            &page,
237            &HashMap::new(),
238            &HashMap::<EmbeddedFontId, &EmbeddedFontPlan>::new(),
239        )
240        .err()
241        .ok_or("missing embedded plan unexpectedly succeeded")?;
242        let diagnostic = match err {
243            CoreError::Diagnostic(diagnostic) => diagnostic,
244            other => return Err(format!("expected diagnostic error, got {other:?}").into()),
245        };
246        ensure!(
247            diagnostic.def().code() == codes::MOS0021.code(),
248            "wrong code: {:?}",
249            diagnostic.def().code()
250        );
251        ensure!(
252            diagnostic.message().contains("Embedded(Regular)")
253                && diagnostic.message().contains("Regular"),
254            "missing context in message: {:?}",
255            diagnostic.message()
256        );
257        Ok(())
258    }
259}