@@ -74,26 +74,21 @@ internal enum ReFlags : int {
7474 public const int A = ( int ) ReFlags . ASCII ;
7575
7676 // long forms
77- public const int TEMPLATE = ( int ) ReFlags . TEMPLATE ;
77+ public const int TEMPLATE = ( int ) ReFlags . TEMPLATE ;
7878 public const int IGNORECASE = ( int ) ReFlags . IGNORECASE ;
79- public const int LOCALE = ( int ) ReFlags . LOCALE ;
80- public const int MULTILINE = ( int ) ReFlags . MULTILINE ;
81- public const int DOTALL = ( int ) ReFlags . DOTALL ;
82- public const int UNICODE = ( int ) ReFlags . UNICODE ;
83- public const int VERBOSE = ( int ) ReFlags . VERBOSE ;
84- public const int ASCII = ( int ) ReFlags . ASCII ;
79+ public const int LOCALE = ( int ) ReFlags . LOCALE ;
80+ public const int MULTILINE = ( int ) ReFlags . MULTILINE ;
81+ public const int DOTALL = ( int ) ReFlags . DOTALL ;
82+ public const int UNICODE = ( int ) ReFlags . UNICODE ;
83+ public const int VERBOSE = ( int ) ReFlags . VERBOSE ;
84+ public const int ASCII = ( int ) ReFlags . ASCII ;
8585
8686 #endregion
8787
8888 #region Public API Surface
8989
90- public static Pattern compile ( CodeContext /*!*/ context , object ? pattern , int flags = 0 ) {
91- try {
92- return GetPattern ( context , pattern , flags , true ) ;
93- } catch ( ArgumentException e ) {
94- throw PythonExceptions . CreateThrowable ( error ( context ) , e . Message ) ;
95- }
96- }
90+ public static Pattern compile ( CodeContext /*!*/ context , object ? pattern , int flags = 0 )
91+ => GetPattern ( context , pattern , flags , true ) ;
9792
9893 public const string engine = "cli reg ex" ;
9994
@@ -170,8 +165,10 @@ public class Pattern : IWeakReferenceable {
170165 private WeakRefTracker ? _weakRefTracker ;
171166
172167 internal Pattern ( CodeContext /*!*/ context , object pattern , ReFlags flags = 0 , bool compiled = false ) {
173- _prePattern = PreParseRegex ( context , PatternAsString ( pattern , ref flags ) , ( flags & ReFlags . VERBOSE ) != 0 , out ReFlags options ) ;
168+ _prePattern = PreParseRegex ( context , PatternAsString ( pattern , ref flags ) , verbose : flags . HasFlag ( ReFlags . VERBOSE ) , isBytes : ! flags . HasFlag ( ReFlags . UNICODE ) , out ReFlags options ) ;
174169 flags |= options ;
170+ if ( flags . HasFlag ( ReFlags . UNICODE | ReFlags . LOCALE ) ) throw PythonOps . ValueError ( "cannot use LOCALE flag with a str pattern" ) ;
171+ if ( flags . HasFlag ( ReFlags . ASCII | ReFlags . LOCALE ) ) throw PythonOps . ValueError ( "ASCII and LOCALE flags are incompatible" ) ;
175172 _re = GenRegex ( context , _prePattern , flags , compiled , false ) ;
176173 this . pattern = pattern ;
177174 this . flags = ( int ) flags ;
@@ -425,7 +422,7 @@ public object sub(CodeContext/*!*/ context, object? repl, object? @string, int c
425422 } ;
426423 prevEnd = match . Index + match . Length ;
427424
428- if ( replacement != null ) return UnescapeGroups ( match , replacement ) ;
425+ if ( replacement != null ) return UnescapeGroups ( context , match , replacement ) ;
429426 return ValidateString ( PythonCalls . Call ( context , repl , Match . Make ( match , this , input ) ) ) ;
430427 } ,
431428 count ) ) ;
@@ -453,7 +450,7 @@ public PythonTuple subn(CodeContext/*!*/ context, object? repl, object? @string,
453450 prevEnd = match . Index + match . Length ;
454451
455452 totalCount ++ ;
456- if ( replacement != null ) return UnescapeGroups ( match , replacement ) ;
453+ if ( replacement != null ) return UnescapeGroups ( context , match , replacement ) ;
457454
458455 return ValidateString ( PythonCalls . Call ( context , repl , Match . Make ( match , this , input ) ) ) ;
459456 } ,
@@ -464,7 +461,7 @@ public PythonTuple subn(CodeContext/*!*/ context, object? repl, object? @string,
464461
465462 public int flags { get ; }
466463
467- public PythonDictionary groupindex {
464+ public MappingProxy groupindex {
468465 get {
469466 if ( _groups == null ) {
470467 PythonDictionary d = new PythonDictionary ( ) ;
@@ -480,7 +477,7 @@ public PythonDictionary groupindex {
480477 }
481478 _groups = d ;
482479 }
483- return _groups ;
480+ return new MappingProxy ( _groups ) ;
484481 }
485482 }
486483
@@ -489,7 +486,7 @@ public PythonDictionary groupindex {
489486 public object pattern { get ; }
490487
491488 public override bool Equals ( object ? obj )
492- => obj is Pattern other && other . pattern == pattern && other . flags == flags ;
489+ => obj is Pattern other && PythonOps . IsOrEqualsRetBool ( other . pattern , pattern ) && other . flags == flags ;
493490
494491 public override int GetHashCode ( ) => pattern . GetHashCode ( ) ^ flags ;
495492
@@ -646,6 +643,8 @@ private Match(RegExpMatch m, Pattern pattern, string text, int pos, int endpos)
646643
647644 #region Public API Surface
648645
646+ public object ? this [ object ? index ] => group ( index ) ;
647+
649648 public string __repr__ ( CodeContext context )
650649 => $ "<re.Match object; span=({ start ( ) } , { end ( ) } ), match={ PythonOps . Repr ( context , group ( 0 ) ) } >";
651650
@@ -851,7 +850,7 @@ private Group GetGroup(object? group) {
851850
852851 int GetGroupIndex ( object ? group ) {
853852 int grpIndex ;
854- if ( ! Converter . TryConvertToInt32 ( group , out grpIndex ) ) {
853+ if ( ! Converter . TryConvertToIndex ( group , out grpIndex , throwOverflowError : false , throwTypeError : false ) ) {
855854 if ( group is string s ) {
856855 grpIndex = re . _re . GroupNumberFromName ( s ) ;
857856 } else if ( group is ExtensibleString es ) {
@@ -924,7 +923,7 @@ private static RegexOptions FlagsToOption(ReFlags flags) {
924923 /// Preparses a regular expression text returning a ParsedRegex class
925924 /// that can be used for further regular expressions.
926925 /// </summary>
927- private static string PreParseRegex ( CodeContext /*!*/ context , string pattern , bool verbose , out ReFlags options ) {
926+ private static string PreParseRegex ( CodeContext /*!*/ context , string pattern , bool verbose , bool isBytes , out ReFlags options ) {
928927 var userPattern = pattern ;
929928 options = default ;
930929 if ( verbose ) options |= ReFlags . VERBOSE ;
@@ -1074,39 +1073,37 @@ static string ApplyVerbose(string pattern) {
10741073
10751074 break ;
10761075 case 'a' :
1077- options |= ReFlags . ASCII ;
1078- RemoveOption ( ref pattern , ref nameIndex ) ;
1079- break ;
10801076 case 'i' :
1081- options |= ReFlags . IGNORECASE ;
1082- RemoveOption ( ref pattern , ref nameIndex ) ;
1083- break ;
10841077 case 'L' :
1085- options |= ReFlags . LOCALE ;
1086- RemoveOption ( ref pattern , ref nameIndex ) ;
1087- break ;
10881078 case 'm' :
1089- options |= ReFlags . MULTILINE ;
1090- RemoveOption ( ref pattern , ref nameIndex ) ;
1091- break ;
10921079 case 's' :
1093- options |= ReFlags . DOTALL ;
1094- RemoveOption ( ref pattern , ref nameIndex ) ;
1095- break ;
10961080 case 'u' :
1097- options |= ReFlags . UNICODE ;
1098- RemoveOption ( ref pattern , ref nameIndex ) ;
1099- break ;
11001081 case 'x' :
1101- if ( ! verbose ) return PreParseRegex ( context , userPattern , true , out options ) ;
1102- options |= ReFlags . VERBOSE ;
1103- RemoveOption ( ref pattern , ref nameIndex ) ;
1104- break ;
1082+ if ( MaybeParseFlags ( pattern . AsSpan ( ) . Slice ( nameIndex ) , out int consumed , out ReFlags flags ) ) {
1083+ nameIndex -= 2 ;
1084+ if ( nameIndex != 0 ) {
1085+ // error in 3.11
1086+ if ( userPattern . Length > 20 ) {
1087+ PythonOps . Warn ( context , PythonExceptions . DeprecationWarning , $ "Flags not at the start of the expression { ( isBytes ? "b" : string . Empty ) } { PythonOps . Repr ( context , userPattern . Substring ( 0 , 20 ) ) } (truncated)") ;
1088+ } else {
1089+ PythonOps . Warn ( context , PythonExceptions . DeprecationWarning , $ "Flags not at the start of the expression { ( isBytes ? "b" : string . Empty ) } { PythonOps . Repr ( context , userPattern ) } ") ;
1090+ }
1091+ }
1092+ if ( flags . HasFlag ( ReFlags . VERBOSE ) && ! verbose ) return PreParseRegex ( context , userPattern , verbose : true , isBytes : isBytes , out options ) ;
1093+ options |= flags ;
1094+ pattern = pattern . Remove ( nameIndex , consumed + 3 ) ;
1095+ break ;
1096+ }
1097+ if ( pattern [ nameIndex + consumed ] != ':' ) {
1098+ throw PythonExceptions . CreateThrowable ( error ( context ) , "Unrecognized flag " + pattern [ nameIndex + consumed ] ) ;
1099+ }
1100+ break ; // grouping construct
11051101 case ':' : break ; // non-capturing
11061102 case '=' : break ; // look ahead assertion
11071103 case '<' : break ; // positive look behind assertion
11081104 case '!' : break ; // negative look ahead assertion
11091105 case '#' : break ; // inline comment
1106+ case '-' : break ; // grouping construct
11101107 case '(' :
11111108 // conditional match alternation (?(id/name)yes-pattern|no-pattern)
11121109 // move past ?( so we don't preparse the name.
@@ -1182,9 +1179,7 @@ static string ApplyVerbose(string pattern) {
11821179 case System . Globalization . UnicodeCategory . LetterNumber :
11831180 case System . Globalization . UnicodeCategory . OtherNumber :
11841181 case System . Globalization . UnicodeCategory . ConnectorPunctuation :
1185- pattern = pattern . Remove ( nameIndex - 1 , 1 ) ;
1186- cur -- ;
1187- break ;
1182+ throw PythonExceptions . CreateThrowable ( error ( context ) , "bad escape \\ " + curChar ) ;
11881183 case System . Globalization . UnicodeCategory . DecimalDigitNumber :
11891184 // actually don't want to unescape '\1', '\2' etc. which are references to groups
11901185 break ;
@@ -1197,29 +1192,60 @@ static string ApplyVerbose(string pattern) {
11971192 }
11981193
11991194 return pattern ;
1200- }
12011195
1202- private static void RemoveOption ( ref string pattern , ref int nameIndex ) {
1203- if ( pattern [ nameIndex - 1 ] == '?' && nameIndex < ( pattern . Length - 1 ) && pattern [ nameIndex + 1 ] == ')' ) {
1204- pattern = pattern . Remove ( nameIndex - 2 , 4 ) ;
1205- nameIndex -= 2 ;
1206- } else {
1207- pattern = pattern . Remove ( nameIndex , 1 ) ;
1208- nameIndex -= 2 ;
1196+ bool MaybeParseFlags ( ReadOnlySpan < char > pattern , out int consumed , out ReFlags flags ) {
1197+ consumed = default ;
1198+ flags = default ;
1199+ foreach ( char c in pattern ) {
1200+ switch ( c ) {
1201+ case 'a' :
1202+ flags |= ReFlags . ASCII ;
1203+ break ;
1204+ case 'i' :
1205+ flags |= ReFlags . IGNORECASE ;
1206+ break ;
1207+ case 'L' :
1208+ flags |= ReFlags . LOCALE ;
1209+ break ;
1210+ case 'm' :
1211+ flags |= ReFlags . MULTILINE ;
1212+ break ;
1213+ case 's' :
1214+ flags |= ReFlags . DOTALL ;
1215+ break ;
1216+ case 'u' :
1217+ flags |= ReFlags . UNICODE ;
1218+ break ;
1219+ case 'x' :
1220+ flags |= ReFlags . VERBOSE ;
1221+ break ;
1222+ case ')' :
1223+ return true ;
1224+ case ':' :
1225+ return false ;
1226+ default :
1227+ return false ;
1228+ }
1229+ consumed ++ ;
1230+ }
1231+ consumed = 0 ;
1232+ return false ;
12091233 }
12101234 }
12111235
12121236 private static string GetRandomString ( ) => r . Next ( int . MaxValue / 2 , int . MaxValue ) . ToString ( ) ;
12131237
1214- private static string UnescapeGroups ( RegExpMatch m , string text ) {
1238+ private static string UnescapeGroups ( CodeContext context , RegExpMatch m , string text ) {
12151239 for ( int i = 0 ; i < text . Length ; i ++ ) {
12161240 if ( text [ i ] == '\\ ' ) {
12171241 StringBuilder sb = new StringBuilder ( text , 0 , i , text . Length ) ;
12181242
12191243 do {
12201244 if ( text [ i ] == '\\ ' ) {
12211245 i ++ ;
1222- if ( i == text . Length ) { sb . Append ( '\\ ' ) ; break ; }
1246+ if ( i == text . Length ) {
1247+ throw PythonExceptions . CreateThrowable ( error ( context ) , $ "bad escape (end of pattern) at position { i - 1 } ") ;
1248+ }
12231249
12241250 switch ( text [ i ] ) {
12251251 case 'n' : sb . Append ( '\n ' ) ; break ;
@@ -1280,6 +1306,7 @@ private static string UnescapeGroups(RegExpMatch m, string text) {
12801306 sb . Append ( ( char ) val ) ;
12811307 }
12821308 } else {
1309+ PythonOps . Warn ( context , PythonExceptions . DeprecationWarning , $ "bad escape \\ { text [ i ] } ") ; // error in 3.7
12831310 sb . Append ( '\\ ' ) ;
12841311 sb . Append ( ( char ) text [ i ] ) ;
12851312 }
0 commit comments