handles_utils/constants.rs
1// We need the ALLOWED_UNICODE_CHARACTER_RANGES for the build as well as in the source, so here it is.
2
3use core::ops::RangeInclusive;
4
5#[cfg(test)]
6#[allow(dead_code)]
7pub fn build_allowed_char_ranges() -> Vec<RangeInclusive<u16>> {
8 let mut new_allowed: Vec<RangeInclusive<u16>> = Vec::new();
9 let mut last: RangeInclusive<u16> = RangeInclusive::new(0u16, 0u16);
10 // assumes the list is sorted!
11 for allowed in ALLOWED_UNICODE_CHARACTER_RANGES {
12 let last_start = last.start();
13 let last_end = last.end();
14 let allowed_start = allowed.start();
15 let allowed_end = allowed.end();
16 if *allowed_start == *last_end + 1u16 {
17 println!(
18 "joining {last_start:#X}..{last_end:#X} with {allowed_start:#X}..#{allowed_end:#X}"
19 );
20 last = RangeInclusive::new(*last.start(), *allowed.end());
21 } else {
22 println!("adding {last_start:#X}..{last_end:#4X}");
23 if *last_end > 0u16 {
24 new_allowed.push(last.clone());
25 }
26 last = allowed.clone()
27 }
28 }
29 new_allowed
30}
31
32/// Characters that are allowed.
33/// This is generated using test_build_allowed_char_ranges
34#[rustfmt::skip]
35pub const ALLOWED_UNICODE_CHARACTER_RANGES: [RangeInclusive<u16>; 54] = [
360x0020..=0x007A,
370x0080..=0x024F,
380x02B0..=0x04FF,
390x0531..=0x058A,
400x0591..=0x05F4,
410x0600..=0x07B1,
420x07C0..=0x07FA,
430x0900..=0x097F,
440x0981..=0x09FB,
450x0A01..=0x0A75,
460x0A81..=0x0AF1,
470x0B01..=0x0B77,
480x0B82..=0x0BFA,
490x0C01..=0x0C7F,
500x0C82..=0x0CF2,
510x0D02..=0x0D7F,
520x0D82..=0x0DF4,
530x0E01..=0x0E5B,
540x0E81..=0x0EDD,
550x0F00..=0x0FDA,
560x1000..=0x10FC,
570x1100..=0x137C,
580x1380..=0x1399,
590x13A0..=0x13F4,
600x1400..=0x167F,
610x1700..=0x1714,
620x1720..=0x1736,
630x1740..=0x1753,
640x1760..=0x1773,
650x1780..=0x17F9,
660x1800..=0x18AA,
670x18B0..=0x18F5,
680x1900..=0x1974,
690x1980..=0x1AAD,
700x1B00..=0x1B7C,
710x1B80..=0x1BB9,
720x1BC0..=0x1C7F,
730x1E00..=0x1FFF,
740x200C..=0x200D,
750x2C80..=0x2CFF,
760x2D30..=0x2D7F,
770x3040..=0x30FF,
780x3400..=0x4DBF,
790x4E00..=0x9FFF,
800xA500..=0xA62B,
810xA880..=0xA8D9,
820xA8E0..=0xA8FB,
830xA900..=0xA95F,
840xA980..=0xA9DF,
850xAA00..=0xAA7B,
860xAA80..=0xAADF,
870xABC0..=0xABF9,
880xAC00..=0xD7AF,
890xF900..=0xFAFF,
90];
91
92// Keep this to show what languages are supported and to generate a new compact
93// list whenever the list is updated.
94// pub const ALLOWED_UNICODE_CHARACTER_RANGES: [RangeInclusive<u16>; 75] = [
95// 0x0020..=0x007A, // BasicLatin
96// 0x0080..=0x00FF, // Latin-1 Supplement
97// 0x0100..=0x017F, // Latin Extended-A
98// 0x0180..=0x024F, // Latin Extended-B
99// 0x02B0..=0x02FF, // Spacing Modifier Letters
100// 0x0300..=0x036F, // Combining diacritical marks
101// 0x0370..=0x03FF, // Greek and Coptic
102// 0x0400..=0x04FF, // Cyrillic
103// 0x0531..=0x058A, // Armenian
104// 0x0591..=0x05F4, // Hebrew
105// 0x0600..=0x06FF, // Arabic
106// 0x0700..=0x074F, // Syriac
107// 0x0750..=0x077F, // ArabicSupplement
108// 0x0780..=0x07B1, // Thaana
109// 0x07C0..=0x07FA, // N'Ko
110// 0x0900..=0x097F, // Devanagari
111// 0x0981..=0x09FB, // Bengali
112// 0x0A01..=0x0A75, // Gurmukhi
113// 0x0A81..=0x0AF1, // Gujarati
114// 0x0B01..=0x0B77, // Oriya
115// 0x0B82..=0x0BFA, // Tamil
116// 0x0C01..=0x0C7F, // Telugu
117// 0x0C82..=0x0CF2, // Kannada
118// 0x0D02..=0x0D7F, // Malayalam
119// 0x0D82..=0x0DF4, // Sinhala
120// 0x0E01..=0x0E5B, // Thai
121// 0x0E81..=0x0EDD, // Lao
122// 0x0F00..=0x0FDA, // Tibetan
123// 0x1000..=0x109F, // Myanmar
124// 0x10A0..=0x10FC, // Georgian
125// 0x1100..=0x11FF, // HangulJamo
126// 0x1200..=0x137C, // Ethiopic
127// 0x1380..=0x1399, // EthiopicSupplement
128// 0x13A0..=0x13F4, // Cherokee
129// 0x1400..=0x167F, // UnifiedCanadianAboriginalSyllabics
130// 0x1700..=0x1714, // Tagalog
131// 0x1720..=0x1736, // Hanunoo
132// 0x1740..=0x1753, // Buhid
133// 0x1760..=0x1773, // Tagbanwa
134// 0x1780..=0x17F9, // Khmer
135// 0x1800..=0x18AA, // Mongolian
136// 0x18B0..=0x18F5, // Unified Canadian Aboriginal Syllabics Extended
137// 0x1900..=0x194F, // Limbu
138// 0x1950..=0x1974, // Tai Le
139// 0x1980..=0x19DF, // New Tai Le
140// 0x19E0..=0x19FF, // Khmer Symbols
141// 0x1A00..=0x1A1F, // Buginese
142// 0x1A20..=0x1AAD, // Tai Tham
143// 0x1B00..=0x1B7C, // Balinese
144// 0x1B80..=0x1BB9, // Sundanese
145// 0x1BC0..=0x1BFF, // Batak
146// 0x1C00..=0x1C4F, // Lepcha
147// 0x1C50..=0x1C7F, // Ol Chiki
148// 0x1E00..=0x1EFF, // Latin Extended Additional
149// 0x1F00..=0x1FFF, // Greek Extended
150// 0x200C..=0x200D, // General punctuation Limited to the Zero-width Joiners
151// 0x2C80..=0x2CFF, // Coptic
152// 0x2D30..=0x2D7F, // Tifinagh
153// 0x3040..=0x309F, // Hiragana
154// 0x30A0..=0x30FF, // Katakana
155// 0x3400..=0x4DBF, // CJK Unified Ideographs Extension A
156// 0x4E00..=0x9FFF, // CJK Unified Ideographs
157// 0xA500..=0xA62B, // Vai
158// 0xA880..=0xA8D9, // Saurashtra
159// 0xA8E0..=0xA8FB, // Devanagari Extended
160// 0xA900..=0xA92F, // Kayah Li
161// 0xA930..=0xA95F, // Rejang
162// 0xA980..=0xA9DF, // Javanese
163// 0xAA00..=0xAA5F, // Cham
164// 0xAA60..=0xAA7B, // Myanmar Extended-A
165// 0xAA80..=0xAADF, // Tai Viet
166// 0xABC0..=0xABF9, // Meetei Mayek
167// 0xAC00..=0xD7AF, // Hangul Syllables
168// 0xF900..=0xFAFF, // CJK Compatibility Ideographs
169// 0xFB50..=0xFDFF, // Arabic Presentation Forms-A
170// ];
171
172// You can comment out the current one and uncomment the original, specific one
173// for all the languages supported.
174#[test]
175#[ignore = "use only to regenerate compacted ALLOWED_UNICODE_CHARACTER_RANGES"]
176fn test_build_allowed_char_ranges() {
177 let res = build_allowed_char_ranges();
178 assert_eq!(res.len(), 54usize);
179 for range in res {
180 let start = range.start();
181 let end = range.end();
182 println!("{start:#4X}..={end:#4X},")
183 }
184}