Skip to content

Commit 75c7c82

Browse files
authored
Re-enable test_re_stdlib (#1466)
* Re-enable test_re_stdlib * Re-enable test_string * Fix test failure
1 parent 26767ee commit 75c7c82

7 files changed

Lines changed: 125 additions & 98 deletions

File tree

Src/IronPython.Modules/_sre.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ namespace IronPython.Modules {
1212
public static class PythonSRegEx {
1313
public const string __doc__ = "non-functional _sre module. Included only for completeness.";
1414

15-
public const int MAGIC = 20031017;
15+
public const int MAGIC = 20140917;
1616
public const int CODESIZE = 2;
1717
public const int MAXREPEAT = 65535;
1818
public const int MAXGROUPS = int.MaxValue;

Src/IronPython.Modules/re.cs

Lines changed: 84 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -74,26 +74,21 @@ internal enum ReFlags : int {
7474
public const int A = (int)ReFlags.ASCII;
7575

7676
// long forms
77-
public const int TEMPLATE = (int)ReFlags.TEMPLATE;
77+
public const int TEMPLATE = (int)ReFlags.TEMPLATE;
7878
public const int IGNORECASE = (int)ReFlags.IGNORECASE;
79-
public const int LOCALE = (int)ReFlags.LOCALE;
80-
public const int MULTILINE = (int)ReFlags.MULTILINE;
81-
public const int DOTALL = (int)ReFlags.DOTALL;
82-
public const int UNICODE = (int)ReFlags.UNICODE;
83-
public const int VERBOSE = (int)ReFlags.VERBOSE;
84-
public const int ASCII = (int)ReFlags.ASCII;
79+
public const int LOCALE = (int)ReFlags.LOCALE;
80+
public const int MULTILINE = (int)ReFlags.MULTILINE;
81+
public const int DOTALL = (int)ReFlags.DOTALL;
82+
public const int UNICODE = (int)ReFlags.UNICODE;
83+
public const int VERBOSE = (int)ReFlags.VERBOSE;
84+
public const int ASCII = (int)ReFlags.ASCII;
8585

8686
#endregion
8787

8888
#region Public API Surface
8989

90-
public static Pattern compile(CodeContext/*!*/ context, object? pattern, int flags = 0) {
91-
try {
92-
return GetPattern(context, pattern, flags, true);
93-
} catch (ArgumentException e) {
94-
throw PythonExceptions.CreateThrowable(error(context), e.Message);
95-
}
96-
}
90+
public static Pattern compile(CodeContext/*!*/ context, object? pattern, int flags = 0)
91+
=> GetPattern(context, pattern, flags, true);
9792

9893
public const string engine = "cli reg ex";
9994

@@ -170,8 +165,10 @@ public class Pattern : IWeakReferenceable {
170165
private WeakRefTracker? _weakRefTracker;
171166

172167
internal Pattern(CodeContext/*!*/ context, object pattern, ReFlags flags = 0, bool compiled = false) {
173-
_prePattern = PreParseRegex(context, PatternAsString(pattern, ref flags), (flags & ReFlags.VERBOSE) != 0, out ReFlags options);
168+
_prePattern = PreParseRegex(context, PatternAsString(pattern, ref flags), verbose: flags.HasFlag(ReFlags.VERBOSE), isBytes: !flags.HasFlag(ReFlags.UNICODE), out ReFlags options);
174169
flags |= options;
170+
if (flags.HasFlag(ReFlags.UNICODE | ReFlags.LOCALE)) throw PythonOps.ValueError("cannot use LOCALE flag with a str pattern");
171+
if (flags.HasFlag(ReFlags.ASCII | ReFlags.LOCALE)) throw PythonOps.ValueError("ASCII and LOCALE flags are incompatible");
175172
_re = GenRegex(context, _prePattern, flags, compiled, false);
176173
this.pattern = pattern;
177174
this.flags = (int)flags;
@@ -425,7 +422,7 @@ public object sub(CodeContext/*!*/ context, object? repl, object? @string, int c
425422
};
426423
prevEnd = match.Index + match.Length;
427424

428-
if (replacement != null) return UnescapeGroups(match, replacement);
425+
if (replacement != null) return UnescapeGroups(context, match, replacement);
429426
return ValidateString(PythonCalls.Call(context, repl, Match.Make(match, this, input)));
430427
},
431428
count));
@@ -453,7 +450,7 @@ public PythonTuple subn(CodeContext/*!*/ context, object? repl, object? @string,
453450
prevEnd = match.Index + match.Length;
454451

455452
totalCount++;
456-
if (replacement != null) return UnescapeGroups(match, replacement);
453+
if (replacement != null) return UnescapeGroups(context, match, replacement);
457454

458455
return ValidateString(PythonCalls.Call(context, repl, Match.Make(match, this, input)));
459456
},
@@ -464,7 +461,7 @@ public PythonTuple subn(CodeContext/*!*/ context, object? repl, object? @string,
464461

465462
public int flags { get; }
466463

467-
public PythonDictionary groupindex {
464+
public MappingProxy groupindex {
468465
get {
469466
if (_groups == null) {
470467
PythonDictionary d = new PythonDictionary();
@@ -480,7 +477,7 @@ public PythonDictionary groupindex {
480477
}
481478
_groups = d;
482479
}
483-
return _groups;
480+
return new MappingProxy(_groups);
484481
}
485482
}
486483

@@ -489,7 +486,7 @@ public PythonDictionary groupindex {
489486
public object pattern { get; }
490487

491488
public override bool Equals(object? obj)
492-
=> obj is Pattern other && other.pattern == pattern && other.flags == flags;
489+
=> obj is Pattern other && PythonOps.IsOrEqualsRetBool(other.pattern, pattern) && other.flags == flags;
493490

494491
public override int GetHashCode() => pattern.GetHashCode() ^ flags;
495492

@@ -646,6 +643,8 @@ private Match(RegExpMatch m, Pattern pattern, string text, int pos, int endpos)
646643

647644
#region Public API Surface
648645

646+
public object? this[object? index] => group(index);
647+
649648
public string __repr__(CodeContext context)
650649
=> $"<re.Match object; span=({start()}, {end()}), match={PythonOps.Repr(context, group(0))}>";
651650

@@ -851,7 +850,7 @@ private Group GetGroup(object? group) {
851850

852851
int GetGroupIndex(object? group) {
853852
int grpIndex;
854-
if (!Converter.TryConvertToInt32(group, out grpIndex)) {
853+
if (!Converter.TryConvertToIndex(group, out grpIndex, throwOverflowError: false, throwTypeError: false)) {
855854
if (group is string s) {
856855
grpIndex = re._re.GroupNumberFromName(s);
857856
} else if (group is ExtensibleString es) {
@@ -924,7 +923,7 @@ private static RegexOptions FlagsToOption(ReFlags flags) {
924923
/// Preparses a regular expression text returning a ParsedRegex class
925924
/// that can be used for further regular expressions.
926925
/// </summary>
927-
private static string PreParseRegex(CodeContext/*!*/ context, string pattern, bool verbose, out ReFlags options) {
926+
private static string PreParseRegex(CodeContext/*!*/ context, string pattern, bool verbose, bool isBytes, out ReFlags options) {
928927
var userPattern = pattern;
929928
options = default;
930929
if (verbose) options |= ReFlags.VERBOSE;
@@ -1074,39 +1073,37 @@ static string ApplyVerbose(string pattern) {
10741073

10751074
break;
10761075
case 'a':
1077-
options |= ReFlags.ASCII;
1078-
RemoveOption(ref pattern, ref nameIndex);
1079-
break;
10801076
case 'i':
1081-
options |= ReFlags.IGNORECASE;
1082-
RemoveOption(ref pattern, ref nameIndex);
1083-
break;
10841077
case 'L':
1085-
options |= ReFlags.LOCALE;
1086-
RemoveOption(ref pattern, ref nameIndex);
1087-
break;
10881078
case 'm':
1089-
options |= ReFlags.MULTILINE;
1090-
RemoveOption(ref pattern, ref nameIndex);
1091-
break;
10921079
case 's':
1093-
options |= ReFlags.DOTALL;
1094-
RemoveOption(ref pattern, ref nameIndex);
1095-
break;
10961080
case 'u':
1097-
options |= ReFlags.UNICODE;
1098-
RemoveOption(ref pattern, ref nameIndex);
1099-
break;
11001081
case 'x':
1101-
if (!verbose) return PreParseRegex(context, userPattern, true, out options);
1102-
options |= ReFlags.VERBOSE;
1103-
RemoveOption(ref pattern, ref nameIndex);
1104-
break;
1082+
if (MaybeParseFlags(pattern.AsSpan().Slice(nameIndex), out int consumed, out ReFlags flags)) {
1083+
nameIndex -= 2;
1084+
if (nameIndex != 0) {
1085+
// error in 3.11
1086+
if (userPattern.Length > 20) {
1087+
PythonOps.Warn(context, PythonExceptions.DeprecationWarning, $"Flags not at the start of the expression {(isBytes ? "b" : string.Empty)}{PythonOps.Repr(context, userPattern.Substring(0, 20))} (truncated)");
1088+
} else {
1089+
PythonOps.Warn(context, PythonExceptions.DeprecationWarning, $"Flags not at the start of the expression {(isBytes ? "b" : string.Empty)}{PythonOps.Repr(context, userPattern)}");
1090+
}
1091+
}
1092+
if (flags.HasFlag(ReFlags.VERBOSE) && !verbose) return PreParseRegex(context, userPattern, verbose: true, isBytes: isBytes, out options);
1093+
options |= flags;
1094+
pattern = pattern.Remove(nameIndex, consumed + 3);
1095+
break;
1096+
}
1097+
if (pattern[nameIndex + consumed] != ':') {
1098+
throw PythonExceptions.CreateThrowable(error(context), "Unrecognized flag " + pattern[nameIndex + consumed]);
1099+
}
1100+
break; // grouping construct
11051101
case ':': break; // non-capturing
11061102
case '=': break; // look ahead assertion
11071103
case '<': break; // positive look behind assertion
11081104
case '!': break; // negative look ahead assertion
11091105
case '#': break; // inline comment
1106+
case '-': break; // grouping construct
11101107
case '(':
11111108
// conditional match alternation (?(id/name)yes-pattern|no-pattern)
11121109
// move past ?( so we don't preparse the name.
@@ -1182,9 +1179,7 @@ static string ApplyVerbose(string pattern) {
11821179
case System.Globalization.UnicodeCategory.LetterNumber:
11831180
case System.Globalization.UnicodeCategory.OtherNumber:
11841181
case System.Globalization.UnicodeCategory.ConnectorPunctuation:
1185-
pattern = pattern.Remove(nameIndex - 1, 1);
1186-
cur--;
1187-
break;
1182+
throw PythonExceptions.CreateThrowable(error(context), "bad escape \\" + curChar);
11881183
case System.Globalization.UnicodeCategory.DecimalDigitNumber:
11891184
// actually don't want to unescape '\1', '\2' etc. which are references to groups
11901185
break;
@@ -1197,29 +1192,60 @@ static string ApplyVerbose(string pattern) {
11971192
}
11981193

11991194
return pattern;
1200-
}
12011195

1202-
private static void RemoveOption(ref string pattern, ref int nameIndex) {
1203-
if (pattern[nameIndex - 1] == '?' && nameIndex < (pattern.Length - 1) && pattern[nameIndex + 1] == ')') {
1204-
pattern = pattern.Remove(nameIndex - 2, 4);
1205-
nameIndex -= 2;
1206-
} else {
1207-
pattern = pattern.Remove(nameIndex, 1);
1208-
nameIndex -= 2;
1196+
bool MaybeParseFlags(ReadOnlySpan<char> pattern, out int consumed, out ReFlags flags) {
1197+
consumed = default;
1198+
flags = default;
1199+
foreach (char c in pattern) {
1200+
switch (c) {
1201+
case 'a':
1202+
flags |= ReFlags.ASCII;
1203+
break;
1204+
case 'i':
1205+
flags |= ReFlags.IGNORECASE;
1206+
break;
1207+
case 'L':
1208+
flags |= ReFlags.LOCALE;
1209+
break;
1210+
case 'm':
1211+
flags |= ReFlags.MULTILINE;
1212+
break;
1213+
case 's':
1214+
flags |= ReFlags.DOTALL;
1215+
break;
1216+
case 'u':
1217+
flags |= ReFlags.UNICODE;
1218+
break;
1219+
case 'x':
1220+
flags |= ReFlags.VERBOSE;
1221+
break;
1222+
case ')':
1223+
return true;
1224+
case ':':
1225+
return false;
1226+
default:
1227+
return false;
1228+
}
1229+
consumed++;
1230+
}
1231+
consumed = 0;
1232+
return false;
12091233
}
12101234
}
12111235

12121236
private static string GetRandomString() => r.Next(int.MaxValue / 2, int.MaxValue).ToString();
12131237

1214-
private static string UnescapeGroups(RegExpMatch m, string text) {
1238+
private static string UnescapeGroups(CodeContext context, RegExpMatch m, string text) {
12151239
for (int i = 0; i < text.Length; i++) {
12161240
if (text[i] == '\\') {
12171241
StringBuilder sb = new StringBuilder(text, 0, i, text.Length);
12181242

12191243
do {
12201244
if (text[i] == '\\') {
12211245
i++;
1222-
if (i == text.Length) { sb.Append('\\'); break; }
1246+
if (i == text.Length) {
1247+
throw PythonExceptions.CreateThrowable(error(context), $"bad escape (end of pattern) at position {i - 1}");
1248+
}
12231249

12241250
switch (text[i]) {
12251251
case 'n': sb.Append('\n'); break;
@@ -1280,6 +1306,7 @@ private static string UnescapeGroups(RegExpMatch m, string text) {
12801306
sb.Append((char)val);
12811307
}
12821308
} else {
1309+
PythonOps.Warn(context, PythonExceptions.DeprecationWarning, $"bad escape \\{text[i]}"); // error in 3.7
12831310
sb.Append('\\');
12841311
sb.Append((char)text[i]);
12851312
}

Src/IronPythonTest/Cases/CPythonCasesManifest.ini

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,9 +1246,6 @@ Ignore=true # unittest.case.SkipTest: Cannot import name SSLSession
12461246
[CPython.test_spwd]
12471247
Ignore=true # fails on macOS
12481248

1249-
[CPython.test_string]
1250-
Ignore=true # test_invalid_placeholders - https://github.com/IronLanguages/ironpython3/issues/1419
1251-
12521249
[CPython.test_string_literals]
12531250
Ignore=true # AssertionError: DeprecationWarning not triggered
12541251

Src/IronPythonTest/Cases/IronPythonCasesManifest.ini

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,6 @@ Ignore=true # multiple failures
189189
[IronPython.test_imp]
190190
Ignore=true # 1 failure
191191

192-
[IronPython.test_re_stdlib]
193-
Ignore=true # AssertionError: SRE module mismatch
194-
195192
[IronPython.test_sax_stdlib]
196193
Ignore=true # multiple failures
197194

Src/StdLib/Lib/string.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,7 @@ class Template(metaclass=_TemplateMetaclass):
8282
# but without ASCII flag. We can't add re.ASCII to flags because of
8383
# backward compatibility. So we use local -i flag and [a-zA-Z] pattern.
8484
# See https://bugs.python.org/issue31672
85-
#idpattern = r'(?-i:[_a-zA-Z][_a-zA-Z0-9]*)'
86-
idpattern = r'[_a-z][_a-z0-9]*' # https://github.com/IronLanguages/ironpython3/issues/1419
85+
idpattern = r'(?-i:[_a-zA-Z][_a-zA-Z0-9]*)'
8786
flags = _re.IGNORECASE
8887

8988
def __init__(self, template):

0 commit comments

Comments
 (0)