pdf_base14_metrics/winansi_table.rs
1// PDF WinAnsiEncoding byte → PostScript glyph name mapping.
2//
3// Source: PDF 1.7 Annex D.2, Table D.2 ("Latin Character Set and
4// Encodings"), column "WIN". This is NOT Microsoft CP1252; the two
5// differ at codes 0x7F, 0x81, 0x8D, 0x8F, 0x90, 0x9D (gaps in PDF;
6// assorted glyphs in CP1252). The PDF WinAnsi table is what every
7// conformant PDF reader uses when a font's /Encoding is
8// /WinAnsiEncoding, so it's what the Mosaic PDF backend must agree
9// with.
10//
11// This file is shared with build.rs via `include!` and consumed by
12// `src/lib.rs` via `mod winansi_table;`: single source of truth.
13// Regular `//` comments only: inner doc comments (`//!`) would
14// break the `include!` consumer because they'd land mid-file in
15// build.rs's binary.
16//
17// Per PDF 1.7 Annex D.2 the encoding pins two aliasing rules:
18// - 0xA0 (non-breaking space) renders with the same glyph as 0x20
19// (`space`).
20// - 0xAD (soft hyphen) renders with the same glyph as 0x2D
21// (`hyphen`).
22// Both aliases are represented by listing the same glyph name at
23// both codes, so byte-indexed lookups Just Work.
24
25// 256-entry table mapping each byte to its PostScript glyph name,
26// or `None` for unmapped slots (control characters 0x00..=0x1F and
27// the six WinAnsi gaps).
28pub(crate) const WINANSI_TABLE: [Option<&str>; 256] = [
29 // 0x00..=0x1F: C0 control characters: unmapped in PDF WinAnsi.
30 None,
31 None,
32 None,
33 None,
34 None,
35 None,
36 None,
37 None, // 0x00..=0x07
38 None,
39 None,
40 None,
41 None,
42 None,
43 None,
44 None,
45 None, // 0x08..=0x0F
46 None,
47 None,
48 None,
49 None,
50 None,
51 None,
52 None,
53 None, // 0x10..=0x17
54 None,
55 None,
56 None,
57 None,
58 None,
59 None,
60 None,
61 None, // 0x18..=0x1F
62 // 0x20..=0x2F: punctuation and digits-precursor.
63 Some("space"), // 0x20
64 Some("exclam"), // 0x21
65 Some("quotedbl"), // 0x22
66 Some("numbersign"), // 0x23
67 Some("dollar"), // 0x24
68 Some("percent"), // 0x25
69 Some("ampersand"), // 0x26
70 Some("quotesingle"), // 0x27 : PDF WinAnsi uses `quotesingle`, not `quoteright`
71 Some("parenleft"), // 0x28
72 Some("parenright"), // 0x29
73 Some("asterisk"), // 0x2A
74 Some("plus"), // 0x2B
75 Some("comma"), // 0x2C
76 Some("hyphen"), // 0x2D
77 Some("period"), // 0x2E
78 Some("slash"), // 0x2F
79 // 0x30..=0x39: digits.
80 Some("zero"),
81 Some("one"),
82 Some("two"),
83 Some("three"),
84 Some("four"),
85 Some("five"),
86 Some("six"),
87 Some("seven"),
88 Some("eight"),
89 Some("nine"),
90 // 0x3A..=0x40: punctuation.
91 Some("colon"), // 0x3A
92 Some("semicolon"), // 0x3B
93 Some("less"), // 0x3C
94 Some("equal"), // 0x3D
95 Some("greater"), // 0x3E
96 Some("question"), // 0x3F
97 Some("at"), // 0x40
98 // 0x41..=0x5A: uppercase A..Z.
99 Some("A"),
100 Some("B"),
101 Some("C"),
102 Some("D"),
103 Some("E"),
104 Some("F"),
105 Some("G"),
106 Some("H"),
107 Some("I"),
108 Some("J"),
109 Some("K"),
110 Some("L"),
111 Some("M"),
112 Some("N"),
113 Some("O"),
114 Some("P"),
115 Some("Q"),
116 Some("R"),
117 Some("S"),
118 Some("T"),
119 Some("U"),
120 Some("V"),
121 Some("W"),
122 Some("X"),
123 Some("Y"),
124 Some("Z"),
125 // 0x5B..=0x60: punctuation.
126 Some("bracketleft"), // 0x5B
127 Some("backslash"), // 0x5C
128 Some("bracketright"), // 0x5D
129 Some("asciicircum"), // 0x5E
130 Some("underscore"), // 0x5F
131 Some("grave"), // 0x60 : PDF WinAnsi: `grave`, not `quoteleft`
132 // 0x61..=0x7A: lowercase a..z.
133 Some("a"),
134 Some("b"),
135 Some("c"),
136 Some("d"),
137 Some("e"),
138 Some("f"),
139 Some("g"),
140 Some("h"),
141 Some("i"),
142 Some("j"),
143 Some("k"),
144 Some("l"),
145 Some("m"),
146 Some("n"),
147 Some("o"),
148 Some("p"),
149 Some("q"),
150 Some("r"),
151 Some("s"),
152 Some("t"),
153 Some("u"),
154 Some("v"),
155 Some("w"),
156 Some("x"),
157 Some("y"),
158 Some("z"),
159 // 0x7B..=0x7E: closing punctuation.
160 Some("braceleft"), // 0x7B
161 Some("bar"), // 0x7C
162 Some("braceright"), // 0x7D
163 Some("asciitilde"), // 0x7E
164 // 0x7F: gap in PDF WinAnsi (CP1252 also leaves it as DEL).
165 None,
166 // 0x80..=0x9F: Windows-extended block.
167 Some("Euro"), // 0x80
168 None, // 0x81 : gap (PDF WinAnsi differs from CP1252 here)
169 Some("quotesinglbase"), // 0x82
170 Some("florin"), // 0x83
171 Some("quotedblbase"), // 0x84
172 Some("ellipsis"), // 0x85
173 Some("dagger"), // 0x86
174 Some("daggerdbl"), // 0x87
175 Some("circumflex"), // 0x88
176 Some("perthousand"), // 0x89
177 Some("Scaron"), // 0x8A
178 Some("guilsinglleft"), // 0x8B
179 Some("OE"), // 0x8C
180 None, // 0x8D : gap
181 Some("Zcaron"), // 0x8E
182 None, // 0x8F : gap
183 None, // 0x90 : gap
184 Some("quoteleft"), // 0x91
185 Some("quoteright"), // 0x92
186 Some("quotedblleft"), // 0x93
187 Some("quotedblright"), // 0x94
188 Some("bullet"), // 0x95
189 Some("endash"), // 0x96
190 Some("emdash"), // 0x97
191 Some("tilde"), // 0x98
192 Some("trademark"), // 0x99
193 Some("scaron"), // 0x9A
194 Some("guilsinglright"), // 0x9B
195 Some("oe"), // 0x9C
196 None, // 0x9D : gap
197 Some("zcaron"), // 0x9E
198 Some("Ydieresis"), // 0x9F
199 // 0xA0..=0xAF: Latin-1 punctuation. 0xA0 aliases `space`; 0xAD aliases `hyphen`.
200 Some("space"), // 0xA0 : alias of 0x20 per PDF 1.7 Annex D.2
201 Some("exclamdown"), // 0xA1
202 Some("cent"), // 0xA2
203 Some("sterling"), // 0xA3
204 Some("currency"), // 0xA4
205 Some("yen"), // 0xA5
206 Some("brokenbar"), // 0xA6
207 Some("section"), // 0xA7
208 Some("dieresis"), // 0xA8
209 Some("copyright"), // 0xA9
210 Some("ordfeminine"), // 0xAA
211 Some("guillemotleft"), // 0xAB
212 Some("logicalnot"), // 0xAC
213 Some("hyphen"), // 0xAD : alias of 0x2D per PDF 1.7 Annex D.2
214 Some("registered"), // 0xAE
215 Some("macron"), // 0xAF
216 // 0xB0..=0xBF.
217 Some("degree"), // 0xB0
218 Some("plusminus"), // 0xB1
219 Some("twosuperior"), // 0xB2
220 Some("threesuperior"), // 0xB3
221 Some("acute"), // 0xB4
222 Some("mu"), // 0xB5
223 Some("paragraph"), // 0xB6
224 Some("periodcentered"), // 0xB7
225 Some("cedilla"), // 0xB8
226 Some("onesuperior"), // 0xB9
227 Some("ordmasculine"), // 0xBA
228 Some("guillemotright"), // 0xBB
229 Some("onequarter"), // 0xBC
230 Some("onehalf"), // 0xBD
231 Some("threequarters"), // 0xBE
232 Some("questiondown"), // 0xBF
233 // 0xC0..=0xDF: uppercase accented Latin.
234 Some("Agrave"),
235 Some("Aacute"),
236 Some("Acircumflex"),
237 Some("Atilde"),
238 Some("Adieresis"),
239 Some("Aring"),
240 Some("AE"),
241 Some("Ccedilla"),
242 Some("Egrave"),
243 Some("Eacute"),
244 Some("Ecircumflex"),
245 Some("Edieresis"),
246 Some("Igrave"),
247 Some("Iacute"),
248 Some("Icircumflex"),
249 Some("Idieresis"),
250 Some("Eth"),
251 Some("Ntilde"),
252 Some("Ograve"),
253 Some("Oacute"),
254 Some("Ocircumflex"),
255 Some("Otilde"),
256 Some("Odieresis"),
257 Some("multiply"),
258 Some("Oslash"),
259 Some("Ugrave"),
260 Some("Uacute"),
261 Some("Ucircumflex"),
262 Some("Udieresis"),
263 Some("Yacute"),
264 Some("Thorn"),
265 Some("germandbls"),
266 // 0xE0..=0xFF: lowercase accented Latin.
267 Some("agrave"),
268 Some("aacute"),
269 Some("acircumflex"),
270 Some("atilde"),
271 Some("adieresis"),
272 Some("aring"),
273 Some("ae"),
274 Some("ccedilla"),
275 Some("egrave"),
276 Some("eacute"),
277 Some("ecircumflex"),
278 Some("edieresis"),
279 Some("igrave"),
280 Some("iacute"),
281 Some("icircumflex"),
282 Some("idieresis"),
283 Some("eth"),
284 Some("ntilde"),
285 Some("ograve"),
286 Some("oacute"),
287 Some("ocircumflex"),
288 Some("otilde"),
289 Some("odieresis"),
290 Some("divide"),
291 Some("oslash"),
292 Some("ugrave"),
293 Some("uacute"),
294 Some("ucircumflex"),
295 Some("udieresis"),
296 Some("yacute"),
297 Some("thorn"),
298 Some("ydieresis"),
299];