handles_utils/
constants.rs

1// We need the ALLOWED_UNICODE_CHARACTER_RANGES for the build as well as in the source, so here it is.
2
3use core::ops::RangeInclusive;
4
5#[cfg(test)]
6#[allow(dead_code)]
7pub fn build_allowed_char_ranges() -> Vec<RangeInclusive<u16>> {
8	let mut new_allowed: Vec<RangeInclusive<u16>> = Vec::new();
9	let mut last: RangeInclusive<u16> = RangeInclusive::new(0u16, 0u16);
10	// assumes the list is sorted!
11	for allowed in ALLOWED_UNICODE_CHARACTER_RANGES {
12		let last_start = last.start();
13		let last_end = last.end();
14		let allowed_start = allowed.start();
15		let allowed_end = allowed.end();
16		if *allowed_start == *last_end + 1u16 {
17			println!(
18				"joining {last_start:#X}..{last_end:#X} with {allowed_start:#X}..#{allowed_end:#X}"
19			);
20			last = RangeInclusive::new(*last.start(), *allowed.end());
21		} else {
22			println!("adding {last_start:#X}..{last_end:#4X}");
23			if *last_end > 0u16 {
24				new_allowed.push(last.clone());
25			}
26			last = allowed.clone()
27		}
28	}
29	new_allowed
30}
31
32/// Characters that are allowed.
33/// This is generated using test_build_allowed_char_ranges
34#[rustfmt::skip]
35pub const ALLOWED_UNICODE_CHARACTER_RANGES: [RangeInclusive<u16>; 54] = [
360x0020..=0x007A,
370x0080..=0x024F,
380x02B0..=0x04FF,
390x0531..=0x058A,
400x0591..=0x05F4,
410x0600..=0x07B1,
420x07C0..=0x07FA,
430x0900..=0x097F,
440x0981..=0x09FB,
450x0A01..=0x0A75,
460x0A81..=0x0AF1,
470x0B01..=0x0B77,
480x0B82..=0x0BFA,
490x0C01..=0x0C7F,
500x0C82..=0x0CF2,
510x0D02..=0x0D7F,
520x0D82..=0x0DF4,
530x0E01..=0x0E5B,
540x0E81..=0x0EDD,
550x0F00..=0x0FDA,
560x1000..=0x10FC,
570x1100..=0x137C,
580x1380..=0x1399,
590x13A0..=0x13F4,
600x1400..=0x167F,
610x1700..=0x1714,
620x1720..=0x1736,
630x1740..=0x1753,
640x1760..=0x1773,
650x1780..=0x17F9,
660x1800..=0x18AA,
670x18B0..=0x18F5,
680x1900..=0x1974,
690x1980..=0x1AAD,
700x1B00..=0x1B7C,
710x1B80..=0x1BB9,
720x1BC0..=0x1C7F,
730x1E00..=0x1FFF,
740x200C..=0x200D,
750x2C80..=0x2CFF,
760x2D30..=0x2D7F,
770x3040..=0x30FF,
780x3400..=0x4DBF,
790x4E00..=0x9FFF,
800xA500..=0xA62B,
810xA880..=0xA8D9,
820xA8E0..=0xA8FB,
830xA900..=0xA95F,
840xA980..=0xA9DF,
850xAA00..=0xAA7B,
860xAA80..=0xAADF,
870xABC0..=0xABF9,
880xAC00..=0xD7AF,
890xF900..=0xFAFF,
90];
91
92// Keep this to show what languages are supported and to generate a new compact
93// list whenever the list is updated.
94// pub const ALLOWED_UNICODE_CHARACTER_RANGES: [RangeInclusive<u16>; 75] = [
95//     0x0020..=0x007A, // BasicLatin
96//     0x0080..=0x00FF, // Latin-1 Supplement
97//     0x0100..=0x017F, // Latin Extended-A
98//     0x0180..=0x024F,   // Latin Extended-B
99//     0x02B0..=0x02FF, // Spacing Modifier Letters
100//     0x0300..=0x036F, // Combining diacritical marks
101//     0x0370..=0x03FF, // Greek and Coptic
102//     0x0400..=0x04FF, // Cyrillic
103//     0x0531..=0x058A, // Armenian
104//     0x0591..=0x05F4, // Hebrew
105//     0x0600..=0x06FF, // Arabic
106//     0x0700..=0x074F, // Syriac
107//     0x0750..=0x077F, // ArabicSupplement
108//     0x0780..=0x07B1, // Thaana
109//     0x07C0..=0x07FA, // N'Ko
110//     0x0900..=0x097F, // Devanagari
111//     0x0981..=0x09FB, // Bengali
112//     0x0A01..=0x0A75, // Gurmukhi
113//     0x0A81..=0x0AF1, // Gujarati
114//     0x0B01..=0x0B77, // Oriya
115//     0x0B82..=0x0BFA, // Tamil
116//     0x0C01..=0x0C7F, // Telugu
117//     0x0C82..=0x0CF2, // Kannada
118//     0x0D02..=0x0D7F, // Malayalam
119//     0x0D82..=0x0DF4, // Sinhala
120//     0x0E01..=0x0E5B, // Thai
121//     0x0E81..=0x0EDD, // Lao
122//     0x0F00..=0x0FDA, // Tibetan
123//     0x1000..=0x109F, // Myanmar
124//     0x10A0..=0x10FC, // Georgian
125//     0x1100..=0x11FF, // HangulJamo
126//     0x1200..=0x137C, // Ethiopic
127//     0x1380..=0x1399, // EthiopicSupplement
128//     0x13A0..=0x13F4, // Cherokee
129//     0x1400..=0x167F, // UnifiedCanadianAboriginalSyllabics
130//     0x1700..=0x1714, // Tagalog
131//     0x1720..=0x1736, // Hanunoo
132//     0x1740..=0x1753, // Buhid
133//     0x1760..=0x1773, // Tagbanwa
134//     0x1780..=0x17F9, // Khmer
135//     0x1800..=0x18AA, // Mongolian
136//     0x18B0..=0x18F5, // Unified Canadian Aboriginal Syllabics Extended
137//     0x1900..=0x194F, // Limbu
138//     0x1950..=0x1974, // Tai Le
139//     0x1980..=0x19DF, // New Tai Le
140//     0x19E0..=0x19FF, // Khmer Symbols
141//     0x1A00..=0x1A1F, // Buginese
142//     0x1A20..=0x1AAD, // Tai Tham
143//     0x1B00..=0x1B7C, // Balinese
144//     0x1B80..=0x1BB9, // Sundanese
145//     0x1BC0..=0x1BFF, // Batak
146//     0x1C00..=0x1C4F, // Lepcha
147//     0x1C50..=0x1C7F, // Ol Chiki
148//     0x1E00..=0x1EFF, // Latin Extended Additional
149//     0x1F00..=0x1FFF, // Greek Extended
150//     0x200C..=0x200D, // General punctuation Limited to the Zero-width Joiners
151//     0x2C80..=0x2CFF, // Coptic
152//     0x2D30..=0x2D7F, // Tifinagh
153//     0x3040..=0x309F, // Hiragana
154//     0x30A0..=0x30FF, // Katakana
155//     0x3400..=0x4DBF, // CJK Unified Ideographs Extension A
156//     0x4E00..=0x9FFF, // CJK Unified Ideographs
157//     0xA500..=0xA62B, // Vai
158//     0xA880..=0xA8D9, // Saurashtra
159//     0xA8E0..=0xA8FB, // Devanagari Extended
160//     0xA900..=0xA92F, // Kayah Li
161//     0xA930..=0xA95F, // Rejang
162//     0xA980..=0xA9DF, // Javanese
163//     0xAA00..=0xAA5F, // Cham
164//     0xAA60..=0xAA7B, // Myanmar Extended-A
165//     0xAA80..=0xAADF, // Tai Viet
166//     0xABC0..=0xABF9, // Meetei Mayek
167//     0xAC00..=0xD7AF, // Hangul Syllables
168//     0xF900..=0xFAFF, // CJK Compatibility Ideographs
169//     0xFB50..=0xFDFF, // Arabic Presentation Forms-A
170// ];
171
172// You can comment out the current one and uncomment the original, specific one
173// for all the languages supported.
174#[test]
175#[ignore = "use only to regenerate compacted ALLOWED_UNICODE_CHARACTER_RANGES"]
176fn test_build_allowed_char_ranges() {
177	let res = build_allowed_char_ranges();
178	assert_eq!(res.len(), 54usize);
179	for range in res {
180		let start = range.start();
181		let end = range.end();
182		println!("{start:#4X}..={end:#4X},")
183	}
184}