Skip to content

Commit 470666f

Browse files
committed
more general Unicode property escapes handling (issue #583)
1 parent 776d1a4 commit 470666f

3 files changed

Lines changed: 37 additions & 9 deletions

File tree

src/main/java/org/htmlunit/javascript/regexp/RegExpJsToJavaConverter.java

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ public class RegExpJsToJavaConverter {
4949
static {
5050
UNICODE_ESCAPES = new HashMap<>();
5151
UNICODE_ESCAPES.put("L", "L");
52-
UNICODE_ESCAPES.put("Letter", "L");
52+
UNICODE_ESCAPES.put("LETTER", "L");
53+
UNICODE_ESCAPES.put("LU", "Lu");
54+
UNICODE_ESCAPES.put("UPPERCASELETTER", "Lu");
55+
UNICODE_ESCAPES.put("LL", "Ll");
56+
UNICODE_ESCAPES.put("LOWERCASELETTER", "Ll");
5357
}
5458

5559
/**
@@ -392,12 +396,7 @@ private void processEscapeSequence() {
392396
}
393397
while (next > -1 && next != '}');
394398
if (next == '}') {
395-
final String escape = tape_.tape_.substring(uPos, tape_.currentPos_ - 1);
396-
final String replace = UNICODE_ESCAPES.get(escape);
397-
if (replace != null) {
398-
tape_.tape_.replace(uPos, uPos + escape.length(), replace);
399-
return;
400-
}
399+
return;
401400
}
402401

403402
// back to the old behavior

src/test/java/org/htmlunit/javascript/regexp/RegExpJsToJavaConverter2Test.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,37 @@ public void validationPatternUnicodeCodePointEscapesFails() throws Exception {
9191
*/
9292
@Test
9393
@Alerts("true")
94-
public void validationPatternUnicodePropertyEscapeL() throws Exception {
94+
public void validationPatternUnicodePropertyEscapeLetter() throws Exception {
9595
validation("\\p{L}*", "Html");
9696
validation("\\p{L}*", "&#x043C&#x0439&#x0440");
9797
}
9898

99+
/**
100+
* @throws Exception if an error occurs
101+
*/
102+
@Test
103+
@Alerts("true")
104+
public void validationPatternUnicodePropertyEscapeUppercaseLetter() throws Exception {
105+
validation("\\p{Lu}*", "HTML");
106+
validation("\\p{Lu}*", "&#x041C&#x0419&#x0420");
107+
validation("\\p{uppercase letter}*", "&#x041C&#x0419&#x0420");
108+
validation("\\p{Uppercase Letter}*", "&#x041C&#x0419&#x0420");
109+
validation("\\p{Uppercase_Letter}*", "&#x041C&#x0419&#x0420");
110+
validation("\\p{Uppercase-Letter}*", "&#x041C&#x0419&#x0420");
111+
validation("\\p{uppercaseletter}*", "&#x041C&#x0419&#x0420");
112+
}
113+
114+
115+
/**
116+
* @throws Exception if an error occurs
117+
*/
118+
@Test
119+
@Alerts("true")
120+
public void validationPatternUnicodePropertyEscapeLowercaseLetter() throws Exception {
121+
validation("\\p{Ll}*", "html");
122+
validation("\\p{Ll}*", "&#x043C&#x0439&#x0440");
123+
}
124+
99125
private void validation(final String pattern, final String value) throws Exception {
100126
final String html =
101127
"<html><head>\n"

src/test/java/org/htmlunit/javascript/regexp/RegExpJsToJavaConverterTest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,10 @@ public void unicodePropertyEscapes() {
392392
final RegExpJsToJavaConverter regExpJsToJavaConverter = new RegExpJsToJavaConverter();
393393

394394
assertEquals("\\p{L}0-9", regExpJsToJavaConverter.convert("\\p{L}0-9"));
395-
assertEquals("\\p{L}0-9", regExpJsToJavaConverter.convert("\\p{Letter}0-9"));
395+
assertEquals("\\p{Letter}0-9", regExpJsToJavaConverter.convert("\\p{Letter}0-9"));
396+
397+
assertEquals("\\p{Lu}0-9", regExpJsToJavaConverter.convert("\\p{Lu}0-9"));
398+
assertEquals("\\p{Ll}0-9", regExpJsToJavaConverter.convert("\\p{Ll}0-9"));
396399

397400
assertEquals("p\\{html\\}0-9", regExpJsToJavaConverter.convert("\\p{html}0-9"));
398401
}

0 commit comments

Comments
 (0)