@@ -154,7 +154,61 @@ public UCD(string version) {
154154 public string unidata_version { get ; private set ; }
155155
156156 public string lookup ( string name ) {
157- return char . ConvertFromUtf32 ( nameLookup [ name ] ) ;
157+ if ( TryLookup ( name , out int code ) )
158+ return char . ConvertFromUtf32 ( code ) ;
159+ throw PythonOps . KeyError ( "undefined character name" ) ;
160+ }
161+
162+ private static bool IsUnifiedIdeograph ( int code ) {
163+ return ( 0x3400 <= code && code <= 0x4DB5 ) || // CJK Ideograph Extension A
164+ ( 0x4E00 <= code && code <= 0x9FEF ) || // CJK Ideograph
165+ ( 0x20000 <= code && code <= 0x2A6D6 ) || // CJK Ideograph Extension B
166+ ( 0x2A700 <= code && code <= 0x2B734 ) || // CJK Ideograph Extension C - 5.2
167+ ( 0x2B740 <= code && code <= 0x2B81D ) || // CJK Ideograph Extension D - 6.0
168+ ( 0x2B820 <= code && code <= 0x2CEA1 ) || // CJK Ideograph Extension E - 8.0
169+ ( 0x2CEB0 <= code && code <= 0x2EBEF ) || // CJK Ideograph Extension F - 10.0
170+ ( 0x30000 <= code && code <= 0x3134A ) ; // CJK Ideograph Extension G - 13.0
171+ }
172+
173+ private bool TryLookup ( string name , out int code ) {
174+ code = 0 ;
175+
176+ if ( name . StartsWith ( "CJK UNIFIED IDEOGRAPH-" , StringComparison . Ordinal ) ) {
177+ var val = name . AsSpan ( 22 ) ;
178+ if ( val . Length != 4 && val . Length != 5 ) return false ;
179+ foreach ( var c in val ) {
180+ code *= 16 ;
181+ switch ( c ) {
182+ case '0' :
183+ case '1' :
184+ case '2' :
185+ case '3' :
186+ case '4' :
187+ case '5' :
188+ case '6' :
189+ case '7' :
190+ case '8' :
191+ case '9' :
192+ code += c - '0' ;
193+ break ;
194+ case 'A' :
195+ case 'B' :
196+ case 'C' :
197+ case 'D' :
198+ case 'E' :
199+ case 'F' :
200+ code += c - 'A' + 10 ;
201+ break ;
202+ default :
203+ code = 0 ;
204+ return false ;
205+ }
206+ }
207+ return IsUnifiedIdeograph ( code ) ;
208+
209+ }
210+
211+ return nameLookup . TryGetValue ( name , out code ) ;
158212 }
159213
160214#nullable enable
@@ -166,6 +220,10 @@ public string name([NotNone] string unichr)
166220 => TryGetName ( GetRune ( unichr ) , out var name ) ? name : @default ;
167221
168222 internal bool TryGetName ( int rune , [ NotNullWhen ( true ) ] out string ? name ) {
223+ if ( IsUnifiedIdeograph ( rune ) ) {
224+ name = $ "CJK UNIFIED IDEOGRAPH-{ rune : X} ";
225+ return true ;
226+ }
169227 if ( TryGetInfo ( rune , out CharInfo info , excludeRanges : true ) ) {
170228 name = info . Name ;
171229 return true ;
0 commit comments