Skip to main content

pdf_base14_metrics/
winansi_table.rs

1// PDF WinAnsiEncoding byte → PostScript glyph name mapping.
2//
3// Source: PDF 1.7 Annex D.2, Table D.2 ("Latin Character Set and
4// Encodings"), column "WIN". This is NOT Microsoft CP1252; the two
5// differ at codes 0x7F, 0x81, 0x8D, 0x8F, 0x90, 0x9D (gaps in PDF;
6// assorted glyphs in CP1252). The PDF WinAnsi table is what every
7// conformant PDF reader uses when a font's /Encoding is
8// /WinAnsiEncoding, so it's what the Mosaic PDF backend must agree
9// with.
10//
11// This file is shared with build.rs via `include!` and consumed by
12// `src/lib.rs` via `mod winansi_table;`: single source of truth.
13// Regular `//` comments only: inner doc comments (`//!`) would
14// break the `include!` consumer because they'd land mid-file in
15// build.rs's binary.
16//
17// Per PDF 1.7 Annex D.2 the encoding pins two aliasing rules:
18//   - 0xA0 (non-breaking space) renders with the same glyph as 0x20
19//     (`space`).
20//   - 0xAD (soft hyphen) renders with the same glyph as 0x2D
21//     (`hyphen`).
22// Both aliases are represented by listing the same glyph name at
23// both codes, so byte-indexed lookups Just Work.
24
25// 256-entry table mapping each byte to its PostScript glyph name,
26// or `None` for unmapped slots (control characters 0x00..=0x1F and
27// the six WinAnsi gaps).
28pub(crate) const WINANSI_TABLE: [Option<&str>; 256] = [
29    // 0x00..=0x1F: C0 control characters: unmapped in PDF WinAnsi.
30    None,
31    None,
32    None,
33    None,
34    None,
35    None,
36    None,
37    None, // 0x00..=0x07
38    None,
39    None,
40    None,
41    None,
42    None,
43    None,
44    None,
45    None, // 0x08..=0x0F
46    None,
47    None,
48    None,
49    None,
50    None,
51    None,
52    None,
53    None, // 0x10..=0x17
54    None,
55    None,
56    None,
57    None,
58    None,
59    None,
60    None,
61    None, // 0x18..=0x1F
62    // 0x20..=0x2F: punctuation and digits-precursor.
63    Some("space"),       // 0x20
64    Some("exclam"),      // 0x21
65    Some("quotedbl"),    // 0x22
66    Some("numbersign"),  // 0x23
67    Some("dollar"),      // 0x24
68    Some("percent"),     // 0x25
69    Some("ampersand"),   // 0x26
70    Some("quotesingle"), // 0x27 : PDF WinAnsi uses `quotesingle`, not `quoteright`
71    Some("parenleft"),   // 0x28
72    Some("parenright"),  // 0x29
73    Some("asterisk"),    // 0x2A
74    Some("plus"),        // 0x2B
75    Some("comma"),       // 0x2C
76    Some("hyphen"),      // 0x2D
77    Some("period"),      // 0x2E
78    Some("slash"),       // 0x2F
79    // 0x30..=0x39: digits.
80    Some("zero"),
81    Some("one"),
82    Some("two"),
83    Some("three"),
84    Some("four"),
85    Some("five"),
86    Some("six"),
87    Some("seven"),
88    Some("eight"),
89    Some("nine"),
90    // 0x3A..=0x40: punctuation.
91    Some("colon"),     // 0x3A
92    Some("semicolon"), // 0x3B
93    Some("less"),      // 0x3C
94    Some("equal"),     // 0x3D
95    Some("greater"),   // 0x3E
96    Some("question"),  // 0x3F
97    Some("at"),        // 0x40
98    // 0x41..=0x5A: uppercase A..Z.
99    Some("A"),
100    Some("B"),
101    Some("C"),
102    Some("D"),
103    Some("E"),
104    Some("F"),
105    Some("G"),
106    Some("H"),
107    Some("I"),
108    Some("J"),
109    Some("K"),
110    Some("L"),
111    Some("M"),
112    Some("N"),
113    Some("O"),
114    Some("P"),
115    Some("Q"),
116    Some("R"),
117    Some("S"),
118    Some("T"),
119    Some("U"),
120    Some("V"),
121    Some("W"),
122    Some("X"),
123    Some("Y"),
124    Some("Z"),
125    // 0x5B..=0x60: punctuation.
126    Some("bracketleft"),  // 0x5B
127    Some("backslash"),    // 0x5C
128    Some("bracketright"), // 0x5D
129    Some("asciicircum"),  // 0x5E
130    Some("underscore"),   // 0x5F
131    Some("grave"),        // 0x60 : PDF WinAnsi: `grave`, not `quoteleft`
132    // 0x61..=0x7A: lowercase a..z.
133    Some("a"),
134    Some("b"),
135    Some("c"),
136    Some("d"),
137    Some("e"),
138    Some("f"),
139    Some("g"),
140    Some("h"),
141    Some("i"),
142    Some("j"),
143    Some("k"),
144    Some("l"),
145    Some("m"),
146    Some("n"),
147    Some("o"),
148    Some("p"),
149    Some("q"),
150    Some("r"),
151    Some("s"),
152    Some("t"),
153    Some("u"),
154    Some("v"),
155    Some("w"),
156    Some("x"),
157    Some("y"),
158    Some("z"),
159    // 0x7B..=0x7E: closing punctuation.
160    Some("braceleft"),  // 0x7B
161    Some("bar"),        // 0x7C
162    Some("braceright"), // 0x7D
163    Some("asciitilde"), // 0x7E
164    // 0x7F: gap in PDF WinAnsi (CP1252 also leaves it as DEL).
165    None,
166    // 0x80..=0x9F: Windows-extended block.
167    Some("Euro"),           // 0x80
168    None,                   // 0x81 : gap (PDF WinAnsi differs from CP1252 here)
169    Some("quotesinglbase"), // 0x82
170    Some("florin"),         // 0x83
171    Some("quotedblbase"),   // 0x84
172    Some("ellipsis"),       // 0x85
173    Some("dagger"),         // 0x86
174    Some("daggerdbl"),      // 0x87
175    Some("circumflex"),     // 0x88
176    Some("perthousand"),    // 0x89
177    Some("Scaron"),         // 0x8A
178    Some("guilsinglleft"),  // 0x8B
179    Some("OE"),             // 0x8C
180    None,                   // 0x8D : gap
181    Some("Zcaron"),         // 0x8E
182    None,                   // 0x8F : gap
183    None,                   // 0x90 : gap
184    Some("quoteleft"),      // 0x91
185    Some("quoteright"),     // 0x92
186    Some("quotedblleft"),   // 0x93
187    Some("quotedblright"),  // 0x94
188    Some("bullet"),         // 0x95
189    Some("endash"),         // 0x96
190    Some("emdash"),         // 0x97
191    Some("tilde"),          // 0x98
192    Some("trademark"),      // 0x99
193    Some("scaron"),         // 0x9A
194    Some("guilsinglright"), // 0x9B
195    Some("oe"),             // 0x9C
196    None,                   // 0x9D : gap
197    Some("zcaron"),         // 0x9E
198    Some("Ydieresis"),      // 0x9F
199    // 0xA0..=0xAF: Latin-1 punctuation. 0xA0 aliases `space`; 0xAD aliases `hyphen`.
200    Some("space"),         // 0xA0 : alias of 0x20 per PDF 1.7 Annex D.2
201    Some("exclamdown"),    // 0xA1
202    Some("cent"),          // 0xA2
203    Some("sterling"),      // 0xA3
204    Some("currency"),      // 0xA4
205    Some("yen"),           // 0xA5
206    Some("brokenbar"),     // 0xA6
207    Some("section"),       // 0xA7
208    Some("dieresis"),      // 0xA8
209    Some("copyright"),     // 0xA9
210    Some("ordfeminine"),   // 0xAA
211    Some("guillemotleft"), // 0xAB
212    Some("logicalnot"),    // 0xAC
213    Some("hyphen"),        // 0xAD : alias of 0x2D per PDF 1.7 Annex D.2
214    Some("registered"),    // 0xAE
215    Some("macron"),        // 0xAF
216    // 0xB0..=0xBF.
217    Some("degree"),         // 0xB0
218    Some("plusminus"),      // 0xB1
219    Some("twosuperior"),    // 0xB2
220    Some("threesuperior"),  // 0xB3
221    Some("acute"),          // 0xB4
222    Some("mu"),             // 0xB5
223    Some("paragraph"),      // 0xB6
224    Some("periodcentered"), // 0xB7
225    Some("cedilla"),        // 0xB8
226    Some("onesuperior"),    // 0xB9
227    Some("ordmasculine"),   // 0xBA
228    Some("guillemotright"), // 0xBB
229    Some("onequarter"),     // 0xBC
230    Some("onehalf"),        // 0xBD
231    Some("threequarters"),  // 0xBE
232    Some("questiondown"),   // 0xBF
233    // 0xC0..=0xDF: uppercase accented Latin.
234    Some("Agrave"),
235    Some("Aacute"),
236    Some("Acircumflex"),
237    Some("Atilde"),
238    Some("Adieresis"),
239    Some("Aring"),
240    Some("AE"),
241    Some("Ccedilla"),
242    Some("Egrave"),
243    Some("Eacute"),
244    Some("Ecircumflex"),
245    Some("Edieresis"),
246    Some("Igrave"),
247    Some("Iacute"),
248    Some("Icircumflex"),
249    Some("Idieresis"),
250    Some("Eth"),
251    Some("Ntilde"),
252    Some("Ograve"),
253    Some("Oacute"),
254    Some("Ocircumflex"),
255    Some("Otilde"),
256    Some("Odieresis"),
257    Some("multiply"),
258    Some("Oslash"),
259    Some("Ugrave"),
260    Some("Uacute"),
261    Some("Ucircumflex"),
262    Some("Udieresis"),
263    Some("Yacute"),
264    Some("Thorn"),
265    Some("germandbls"),
266    // 0xE0..=0xFF: lowercase accented Latin.
267    Some("agrave"),
268    Some("aacute"),
269    Some("acircumflex"),
270    Some("atilde"),
271    Some("adieresis"),
272    Some("aring"),
273    Some("ae"),
274    Some("ccedilla"),
275    Some("egrave"),
276    Some("eacute"),
277    Some("ecircumflex"),
278    Some("edieresis"),
279    Some("igrave"),
280    Some("iacute"),
281    Some("icircumflex"),
282    Some("idieresis"),
283    Some("eth"),
284    Some("ntilde"),
285    Some("ograve"),
286    Some("oacute"),
287    Some("ocircumflex"),
288    Some("otilde"),
289    Some("odieresis"),
290    Some("divide"),
291    Some("oslash"),
292    Some("ugrave"),
293    Some("uacute"),
294    Some("ucircumflex"),
295    Some("udieresis"),
296    Some("yacute"),
297    Some("thorn"),
298    Some("ydieresis"),
299];