pdf_base14_metrics/winansi_char_map.rs
1// PDF `WinAnsiEncoding` byte → Unicode `char` mapping, transcribed
2// directly from PDF 1.7 Annex D.2 Table D.2 (column "WIN"). This is
3// the source of truth scanned by `winansi_byte` to find the
4// `WinAnsi` byte for a Unicode `char`.
5//
6// Why a hand-written table rather than deriving from the Adobe Glyph
7// List at build time: the AGL data is BSD-3-Clause and would force
8// that leg onto the crate's SPDX expression. Transcribing the 256
9// slots from PDF 1.7: a normative spec, not someone else's data.
10// keeps the published artifact MIT + APAFML only. The
11// `winansi_vendor` integration test re-derives the same map from AGL
12// at test time and asserts byte-for-byte equality, so any
13// transcription error here is caught by CI before it can ship.
14//
15// This file is `mod`-included by `src/lib.rs` only. It is deliberately
16// NOT pulled into `build.rs` (unlike its sibling `winansi_table.rs`)
17// because the build script doesn't need it: keeping it out of build.rs
18// avoids a `dead_code` warning in the build-script binary.
19//
20// Per PDF 1.7 Annex D.2 the encoding pins two aliasing rules:
21// - 0xA0 (non-breaking space) renders with the `space` glyph
22// ⇒ Unicode U+0020 (regular ASCII space, not U+00A0 NBSP).
23// - 0xAD (soft hyphen) renders with the `hyphen` glyph
24// ⇒ Unicode U+002D (regular ASCII hyphen-minus, not U+00AD SHY).
25// This matches how PDF readers actually paint these bytes; it is NOT
26// the same as Latin-1 / CP1252 round-tripping.
27
28pub(crate) const WINANSI_CHAR_MAP: [Option<char>; 256] = [
29 // 0x00..=0x1F: C0 control characters: unmapped in PDF WinAnsi.
30 None,
31 None,
32 None,
33 None,
34 None,
35 None,
36 None,
37 None, // 0x00..=0x07
38 None,
39 None,
40 None,
41 None,
42 None,
43 None,
44 None,
45 None, // 0x08..=0x0F
46 None,
47 None,
48 None,
49 None,
50 None,
51 None,
52 None,
53 None, // 0x10..=0x17
54 None,
55 None,
56 None,
57 None,
58 None,
59 None,
60 None,
61 None, // 0x18..=0x1F
62 // 0x20..=0x7E: printable ASCII (identity mapping).
63 Some(' '),
64 Some('!'),
65 Some('"'),
66 Some('#'), // 0x20..=0x23
67 Some('$'),
68 Some('%'),
69 Some('&'),
70 Some('\''), // 0x24..=0x27
71 Some('('),
72 Some(')'),
73 Some('*'),
74 Some('+'), // 0x28..=0x2B
75 Some(','),
76 Some('-'),
77 Some('.'),
78 Some('/'), // 0x2C..=0x2F
79 Some('0'),
80 Some('1'),
81 Some('2'),
82 Some('3'), // 0x30..=0x33
83 Some('4'),
84 Some('5'),
85 Some('6'),
86 Some('7'), // 0x34..=0x37
87 Some('8'),
88 Some('9'),
89 Some(':'),
90 Some(';'), // 0x38..=0x3B
91 Some('<'),
92 Some('='),
93 Some('>'),
94 Some('?'), // 0x3C..=0x3F
95 Some('@'),
96 Some('A'),
97 Some('B'),
98 Some('C'), // 0x40..=0x43
99 Some('D'),
100 Some('E'),
101 Some('F'),
102 Some('G'), // 0x44..=0x47
103 Some('H'),
104 Some('I'),
105 Some('J'),
106 Some('K'), // 0x48..=0x4B
107 Some('L'),
108 Some('M'),
109 Some('N'),
110 Some('O'), // 0x4C..=0x4F
111 Some('P'),
112 Some('Q'),
113 Some('R'),
114 Some('S'), // 0x50..=0x53
115 Some('T'),
116 Some('U'),
117 Some('V'),
118 Some('W'), // 0x54..=0x57
119 Some('X'),
120 Some('Y'),
121 Some('Z'),
122 Some('['), // 0x58..=0x5B
123 Some('\\'),
124 Some(']'),
125 Some('^'),
126 Some('_'), // 0x5C..=0x5F
127 Some('`'),
128 Some('a'),
129 Some('b'),
130 Some('c'), // 0x60..=0x63
131 Some('d'),
132 Some('e'),
133 Some('f'),
134 Some('g'), // 0x64..=0x67
135 Some('h'),
136 Some('i'),
137 Some('j'),
138 Some('k'), // 0x68..=0x6B
139 Some('l'),
140 Some('m'),
141 Some('n'),
142 Some('o'), // 0x6C..=0x6F
143 Some('p'),
144 Some('q'),
145 Some('r'),
146 Some('s'), // 0x70..=0x73
147 Some('t'),
148 Some('u'),
149 Some('v'),
150 Some('w'), // 0x74..=0x77
151 Some('x'),
152 Some('y'),
153 Some('z'),
154 Some('{'), // 0x78..=0x7B
155 Some('|'),
156 Some('}'),
157 Some('~'), // 0x7C..=0x7E
158 None, // 0x7F unassigned
159 // 0x80..=0x9F: Windows-1252 extensions (with WinAnsi-specific gaps).
160 Some('\u{20AC}'), // 0x80 Euro
161 None, // 0x81 unassigned
162 Some('\u{201A}'), // 0x82 quotesinglbase
163 Some('\u{0192}'), // 0x83 florin
164 Some('\u{201E}'), // 0x84 quotedblbase
165 Some('\u{2026}'), // 0x85 ellipsis
166 Some('\u{2020}'), // 0x86 dagger
167 Some('\u{2021}'), // 0x87 daggerdbl
168 Some('\u{02C6}'), // 0x88 circumflex
169 Some('\u{2030}'), // 0x89 perthousand
170 Some('\u{0160}'), // 0x8A Scaron
171 Some('\u{2039}'), // 0x8B guilsinglleft
172 Some('\u{0152}'), // 0x8C OE
173 None, // 0x8D unassigned
174 Some('\u{017D}'), // 0x8E Zcaron
175 None, // 0x8F unassigned
176 None, // 0x90 unassigned
177 Some('\u{2018}'), // 0x91 quoteleft
178 Some('\u{2019}'), // 0x92 quoteright
179 Some('\u{201C}'), // 0x93 quotedblleft
180 Some('\u{201D}'), // 0x94 quotedblright
181 Some('\u{2022}'), // 0x95 bullet
182 Some('\u{2013}'), // 0x96 endash
183 Some('\u{2014}'), // 0x97 emdash
184 Some('\u{02DC}'), // 0x98 tilde
185 Some('\u{2122}'), // 0x99 trademark
186 Some('\u{0161}'), // 0x9A scaron
187 Some('\u{203A}'), // 0x9B guilsinglright
188 Some('\u{0153}'), // 0x9C oe
189 None, // 0x9D unassigned
190 Some('\u{017E}'), // 0x9E zcaron
191 Some('\u{0178}'), // 0x9F Ydieresis
192 // 0xA0..=0xAF: Latin-1 punctuation. 0xA0 → space, 0xAD → hyphen.
193 Some(' '), // 0xA0 nbspace → space glyph (U+0020)
194 Some('\u{00A1}'), // 0xA1 exclamdown
195 Some('\u{00A2}'), // 0xA2 cent
196 Some('\u{00A3}'), // 0xA3 sterling
197 Some('\u{00A4}'), // 0xA4 currency
198 Some('\u{00A5}'), // 0xA5 yen
199 Some('\u{00A6}'), // 0xA6 brokenbar
200 Some('\u{00A7}'), // 0xA7 section
201 Some('\u{00A8}'), // 0xA8 dieresis
202 Some('\u{00A9}'), // 0xA9 copyright
203 Some('\u{00AA}'), // 0xAA ordfeminine
204 Some('\u{00AB}'), // 0xAB guillemotleft
205 Some('\u{00AC}'), // 0xAC logicalnot
206 Some('-'), // 0xAD sfthyphen → hyphen glyph (U+002D)
207 Some('\u{00AE}'), // 0xAE registered
208 Some('\u{00AF}'), // 0xAF macron
209 // 0xB0..=0xFF: Latin-1 supplement (identity with U+00B0..=U+00FF).
210 Some('\u{00B0}'),
211 Some('\u{00B1}'),
212 Some('\u{00B2}'),
213 Some('\u{00B3}'), // 0xB0..=0xB3
214 Some('\u{00B4}'),
215 Some('\u{00B5}'),
216 Some('\u{00B6}'),
217 Some('\u{00B7}'), // 0xB4..=0xB7
218 Some('\u{00B8}'),
219 Some('\u{00B9}'),
220 Some('\u{00BA}'),
221 Some('\u{00BB}'), // 0xB8..=0xBB
222 Some('\u{00BC}'),
223 Some('\u{00BD}'),
224 Some('\u{00BE}'),
225 Some('\u{00BF}'), // 0xBC..=0xBF
226 Some('\u{00C0}'),
227 Some('\u{00C1}'),
228 Some('\u{00C2}'),
229 Some('\u{00C3}'), // 0xC0..=0xC3
230 Some('\u{00C4}'),
231 Some('\u{00C5}'),
232 Some('\u{00C6}'),
233 Some('\u{00C7}'), // 0xC4..=0xC7
234 Some('\u{00C8}'),
235 Some('\u{00C9}'),
236 Some('\u{00CA}'),
237 Some('\u{00CB}'), // 0xC8..=0xCB
238 Some('\u{00CC}'),
239 Some('\u{00CD}'),
240 Some('\u{00CE}'),
241 Some('\u{00CF}'), // 0xCC..=0xCF
242 Some('\u{00D0}'),
243 Some('\u{00D1}'),
244 Some('\u{00D2}'),
245 Some('\u{00D3}'), // 0xD0..=0xD3
246 Some('\u{00D4}'),
247 Some('\u{00D5}'),
248 Some('\u{00D6}'),
249 Some('\u{00D7}'), // 0xD4..=0xD7
250 Some('\u{00D8}'),
251 Some('\u{00D9}'),
252 Some('\u{00DA}'),
253 Some('\u{00DB}'), // 0xD8..=0xDB
254 Some('\u{00DC}'),
255 Some('\u{00DD}'),
256 Some('\u{00DE}'),
257 Some('\u{00DF}'), // 0xDC..=0xDF
258 Some('\u{00E0}'),
259 Some('\u{00E1}'),
260 Some('\u{00E2}'),
261 Some('\u{00E3}'), // 0xE0..=0xE3
262 Some('\u{00E4}'),
263 Some('\u{00E5}'),
264 Some('\u{00E6}'),
265 Some('\u{00E7}'), // 0xE4..=0xE7
266 Some('\u{00E8}'),
267 Some('\u{00E9}'),
268 Some('\u{00EA}'),
269 Some('\u{00EB}'), // 0xE8..=0xEB
270 Some('\u{00EC}'),
271 Some('\u{00ED}'),
272 Some('\u{00EE}'),
273 Some('\u{00EF}'), // 0xEC..=0xEF
274 Some('\u{00F0}'),
275 Some('\u{00F1}'),
276 Some('\u{00F2}'),
277 Some('\u{00F3}'), // 0xF0..=0xF3
278 Some('\u{00F4}'),
279 Some('\u{00F5}'),
280 Some('\u{00F6}'),
281 Some('\u{00F7}'), // 0xF4..=0xF7
282 Some('\u{00F8}'),
283 Some('\u{00F9}'),
284 Some('\u{00FA}'),
285 Some('\u{00FB}'), // 0xF8..=0xFB
286 Some('\u{00FC}'),
287 Some('\u{00FD}'),
288 Some('\u{00FE}'),
289 Some('\u{00FF}'), // 0xFC..=0xFF
290];