Skip to main content

pdf_base14_metrics/
winansi_char_map.rs

1// PDF `WinAnsiEncoding` byte → Unicode `char` mapping, transcribed
2// directly from PDF 1.7 Annex D.2 Table D.2 (column "WIN"). This is
3// the source of truth scanned by `winansi_byte` to find the
4// `WinAnsi` byte for a Unicode `char`.
5//
6// Why a hand-written table rather than deriving from the Adobe Glyph
7// List at build time: the AGL data is BSD-3-Clause and would force
8// that leg onto the crate's SPDX expression. Transcribing the 256
9// slots from PDF 1.7: a normative spec, not someone else's data.
10// keeps the published artifact MIT + APAFML only. The
11// `winansi_vendor` integration test re-derives the same map from AGL
12// at test time and asserts byte-for-byte equality, so any
13// transcription error here is caught by CI before it can ship.
14//
15// This file is `mod`-included by `src/lib.rs` only. It is deliberately
16// NOT pulled into `build.rs` (unlike its sibling `winansi_table.rs`)
17// because the build script doesn't need it: keeping it out of build.rs
18// avoids a `dead_code` warning in the build-script binary.
19//
20// Per PDF 1.7 Annex D.2 the encoding pins two aliasing rules:
21//   - 0xA0 (non-breaking space) renders with the `space` glyph
22//     ⇒ Unicode U+0020 (regular ASCII space, not U+00A0 NBSP).
23//   - 0xAD (soft hyphen) renders with the `hyphen` glyph
24//     ⇒ Unicode U+002D (regular ASCII hyphen-minus, not U+00AD SHY).
25// This matches how PDF readers actually paint these bytes; it is NOT
26// the same as Latin-1 / CP1252 round-tripping.
27
28pub(crate) const WINANSI_CHAR_MAP: [Option<char>; 256] = [
29    // 0x00..=0x1F: C0 control characters: unmapped in PDF WinAnsi.
30    None,
31    None,
32    None,
33    None,
34    None,
35    None,
36    None,
37    None, // 0x00..=0x07
38    None,
39    None,
40    None,
41    None,
42    None,
43    None,
44    None,
45    None, // 0x08..=0x0F
46    None,
47    None,
48    None,
49    None,
50    None,
51    None,
52    None,
53    None, // 0x10..=0x17
54    None,
55    None,
56    None,
57    None,
58    None,
59    None,
60    None,
61    None, // 0x18..=0x1F
62    // 0x20..=0x7E: printable ASCII (identity mapping).
63    Some(' '),
64    Some('!'),
65    Some('"'),
66    Some('#'), // 0x20..=0x23
67    Some('$'),
68    Some('%'),
69    Some('&'),
70    Some('\''), // 0x24..=0x27
71    Some('('),
72    Some(')'),
73    Some('*'),
74    Some('+'), // 0x28..=0x2B
75    Some(','),
76    Some('-'),
77    Some('.'),
78    Some('/'), // 0x2C..=0x2F
79    Some('0'),
80    Some('1'),
81    Some('2'),
82    Some('3'), // 0x30..=0x33
83    Some('4'),
84    Some('5'),
85    Some('6'),
86    Some('7'), // 0x34..=0x37
87    Some('8'),
88    Some('9'),
89    Some(':'),
90    Some(';'), // 0x38..=0x3B
91    Some('<'),
92    Some('='),
93    Some('>'),
94    Some('?'), // 0x3C..=0x3F
95    Some('@'),
96    Some('A'),
97    Some('B'),
98    Some('C'), // 0x40..=0x43
99    Some('D'),
100    Some('E'),
101    Some('F'),
102    Some('G'), // 0x44..=0x47
103    Some('H'),
104    Some('I'),
105    Some('J'),
106    Some('K'), // 0x48..=0x4B
107    Some('L'),
108    Some('M'),
109    Some('N'),
110    Some('O'), // 0x4C..=0x4F
111    Some('P'),
112    Some('Q'),
113    Some('R'),
114    Some('S'), // 0x50..=0x53
115    Some('T'),
116    Some('U'),
117    Some('V'),
118    Some('W'), // 0x54..=0x57
119    Some('X'),
120    Some('Y'),
121    Some('Z'),
122    Some('['), // 0x58..=0x5B
123    Some('\\'),
124    Some(']'),
125    Some('^'),
126    Some('_'), // 0x5C..=0x5F
127    Some('`'),
128    Some('a'),
129    Some('b'),
130    Some('c'), // 0x60..=0x63
131    Some('d'),
132    Some('e'),
133    Some('f'),
134    Some('g'), // 0x64..=0x67
135    Some('h'),
136    Some('i'),
137    Some('j'),
138    Some('k'), // 0x68..=0x6B
139    Some('l'),
140    Some('m'),
141    Some('n'),
142    Some('o'), // 0x6C..=0x6F
143    Some('p'),
144    Some('q'),
145    Some('r'),
146    Some('s'), // 0x70..=0x73
147    Some('t'),
148    Some('u'),
149    Some('v'),
150    Some('w'), // 0x74..=0x77
151    Some('x'),
152    Some('y'),
153    Some('z'),
154    Some('{'), // 0x78..=0x7B
155    Some('|'),
156    Some('}'),
157    Some('~'), // 0x7C..=0x7E
158    None,      // 0x7F unassigned
159    // 0x80..=0x9F: Windows-1252 extensions (with WinAnsi-specific gaps).
160    Some('\u{20AC}'), // 0x80 Euro
161    None,             // 0x81 unassigned
162    Some('\u{201A}'), // 0x82 quotesinglbase
163    Some('\u{0192}'), // 0x83 florin
164    Some('\u{201E}'), // 0x84 quotedblbase
165    Some('\u{2026}'), // 0x85 ellipsis
166    Some('\u{2020}'), // 0x86 dagger
167    Some('\u{2021}'), // 0x87 daggerdbl
168    Some('\u{02C6}'), // 0x88 circumflex
169    Some('\u{2030}'), // 0x89 perthousand
170    Some('\u{0160}'), // 0x8A Scaron
171    Some('\u{2039}'), // 0x8B guilsinglleft
172    Some('\u{0152}'), // 0x8C OE
173    None,             // 0x8D unassigned
174    Some('\u{017D}'), // 0x8E Zcaron
175    None,             // 0x8F unassigned
176    None,             // 0x90 unassigned
177    Some('\u{2018}'), // 0x91 quoteleft
178    Some('\u{2019}'), // 0x92 quoteright
179    Some('\u{201C}'), // 0x93 quotedblleft
180    Some('\u{201D}'), // 0x94 quotedblright
181    Some('\u{2022}'), // 0x95 bullet
182    Some('\u{2013}'), // 0x96 endash
183    Some('\u{2014}'), // 0x97 emdash
184    Some('\u{02DC}'), // 0x98 tilde
185    Some('\u{2122}'), // 0x99 trademark
186    Some('\u{0161}'), // 0x9A scaron
187    Some('\u{203A}'), // 0x9B guilsinglright
188    Some('\u{0153}'), // 0x9C oe
189    None,             // 0x9D unassigned
190    Some('\u{017E}'), // 0x9E zcaron
191    Some('\u{0178}'), // 0x9F Ydieresis
192    // 0xA0..=0xAF: Latin-1 punctuation. 0xA0 → space, 0xAD → hyphen.
193    Some(' '),        // 0xA0 nbspace → space glyph (U+0020)
194    Some('\u{00A1}'), // 0xA1 exclamdown
195    Some('\u{00A2}'), // 0xA2 cent
196    Some('\u{00A3}'), // 0xA3 sterling
197    Some('\u{00A4}'), // 0xA4 currency
198    Some('\u{00A5}'), // 0xA5 yen
199    Some('\u{00A6}'), // 0xA6 brokenbar
200    Some('\u{00A7}'), // 0xA7 section
201    Some('\u{00A8}'), // 0xA8 dieresis
202    Some('\u{00A9}'), // 0xA9 copyright
203    Some('\u{00AA}'), // 0xAA ordfeminine
204    Some('\u{00AB}'), // 0xAB guillemotleft
205    Some('\u{00AC}'), // 0xAC logicalnot
206    Some('-'),        // 0xAD sfthyphen → hyphen glyph (U+002D)
207    Some('\u{00AE}'), // 0xAE registered
208    Some('\u{00AF}'), // 0xAF macron
209    // 0xB0..=0xFF: Latin-1 supplement (identity with U+00B0..=U+00FF).
210    Some('\u{00B0}'),
211    Some('\u{00B1}'),
212    Some('\u{00B2}'),
213    Some('\u{00B3}'), // 0xB0..=0xB3
214    Some('\u{00B4}'),
215    Some('\u{00B5}'),
216    Some('\u{00B6}'),
217    Some('\u{00B7}'), // 0xB4..=0xB7
218    Some('\u{00B8}'),
219    Some('\u{00B9}'),
220    Some('\u{00BA}'),
221    Some('\u{00BB}'), // 0xB8..=0xBB
222    Some('\u{00BC}'),
223    Some('\u{00BD}'),
224    Some('\u{00BE}'),
225    Some('\u{00BF}'), // 0xBC..=0xBF
226    Some('\u{00C0}'),
227    Some('\u{00C1}'),
228    Some('\u{00C2}'),
229    Some('\u{00C3}'), // 0xC0..=0xC3
230    Some('\u{00C4}'),
231    Some('\u{00C5}'),
232    Some('\u{00C6}'),
233    Some('\u{00C7}'), // 0xC4..=0xC7
234    Some('\u{00C8}'),
235    Some('\u{00C9}'),
236    Some('\u{00CA}'),
237    Some('\u{00CB}'), // 0xC8..=0xCB
238    Some('\u{00CC}'),
239    Some('\u{00CD}'),
240    Some('\u{00CE}'),
241    Some('\u{00CF}'), // 0xCC..=0xCF
242    Some('\u{00D0}'),
243    Some('\u{00D1}'),
244    Some('\u{00D2}'),
245    Some('\u{00D3}'), // 0xD0..=0xD3
246    Some('\u{00D4}'),
247    Some('\u{00D5}'),
248    Some('\u{00D6}'),
249    Some('\u{00D7}'), // 0xD4..=0xD7
250    Some('\u{00D8}'),
251    Some('\u{00D9}'),
252    Some('\u{00DA}'),
253    Some('\u{00DB}'), // 0xD8..=0xDB
254    Some('\u{00DC}'),
255    Some('\u{00DD}'),
256    Some('\u{00DE}'),
257    Some('\u{00DF}'), // 0xDC..=0xDF
258    Some('\u{00E0}'),
259    Some('\u{00E1}'),
260    Some('\u{00E2}'),
261    Some('\u{00E3}'), // 0xE0..=0xE3
262    Some('\u{00E4}'),
263    Some('\u{00E5}'),
264    Some('\u{00E6}'),
265    Some('\u{00E7}'), // 0xE4..=0xE7
266    Some('\u{00E8}'),
267    Some('\u{00E9}'),
268    Some('\u{00EA}'),
269    Some('\u{00EB}'), // 0xE8..=0xEB
270    Some('\u{00EC}'),
271    Some('\u{00ED}'),
272    Some('\u{00EE}'),
273    Some('\u{00EF}'), // 0xEC..=0xEF
274    Some('\u{00F0}'),
275    Some('\u{00F1}'),
276    Some('\u{00F2}'),
277    Some('\u{00F3}'), // 0xF0..=0xF3
278    Some('\u{00F4}'),
279    Some('\u{00F5}'),
280    Some('\u{00F6}'),
281    Some('\u{00F7}'), // 0xF4..=0xF7
282    Some('\u{00F8}'),
283    Some('\u{00F9}'),
284    Some('\u{00FA}'),
285    Some('\u{00FB}'), // 0xF8..=0xFB
286    Some('\u{00FC}'),
287    Some('\u{00FD}'),
288    Some('\u{00FE}'),
289    Some('\u{00FF}'), // 0xFC..=0xFF
290];