Skip to content

Commit 9f9904f

Browse files
committed
fix handling of CData sections in XHTML documents
1 parent 5e8f36b commit 9f9904f

5 files changed

Lines changed: 41 additions & 27 deletions

File tree

src/main/java/org/htmlunit/html/parser/neko/HtmlUnitNekoDOMBuilder.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.htmlunit.cyberneko.xerces.xni.XNIException;
4141
import org.htmlunit.cyberneko.xerces.xni.parser.XMLInputSource;
4242
import org.htmlunit.cyberneko.xerces.xni.parser.XMLParserConfiguration;
43+
import org.htmlunit.html.DomCDataSection;
4344
import org.htmlunit.html.DomComment;
4445
import org.htmlunit.html.DomDocumentType;
4546
import org.htmlunit.html.DomElement;
@@ -630,7 +631,11 @@ public void comment(final char[] ch, final int start, final int length) {
630631
/** {@inheritDoc} */
631632
@Override
632633
public void endCDATA() {
633-
// nothing to do
634+
final String data = characters_.toString();
635+
characters_.clear();
636+
637+
final DomCDataSection cdataSection = new DomCDataSection(page_, data);
638+
appendChild(currentNode_, cdataSection);
634639
}
635640

636641
/** {@inheritDoc} */
@@ -648,7 +653,7 @@ public void endEntity(final String name) {
648653
/** {@inheritDoc} */
649654
@Override
650655
public void startCDATA() {
651-
// nothing to do
656+
handleCharacters();
652657
}
653658

654659
/** {@inheritDoc} */

src/main/java/org/htmlunit/html/parser/neko/HtmlUnitNekoHtmlParser.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ public void parse(final WebClient webClient, final WebResponse webResponse, fina
155155
domBuilder.setFeature(HTMLScanner.ALLOW_SELFCLOSING_TAGS, true);
156156
domBuilder.setFeature(HTMLScanner.SCRIPT_STRIP_CDATA_DELIMS, true);
157157
domBuilder.setFeature(HTMLScanner.STYLE_STRIP_CDATA_DELIMS, true);
158+
domBuilder.setFeature(HTMLScanner.CDATA_SECTIONS, true);
158159
domBuilder.setFeature(HTMLScanner.CDATA_EARLY_CLOSING, false);
159160
}
160161

src/main/java/org/htmlunit/html/serializer/HtmlSerializerVisibleText.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.htmlunit.WebWindow;
2525
import org.htmlunit.css.ComputedCssStyleDeclaration;
2626
import org.htmlunit.css.StyleAttributes.Definition;
27+
import org.htmlunit.html.DomCDataSection;
2728
import org.htmlunit.html.DomComment;
2829
import org.htmlunit.html.DomElement;
2930
import org.htmlunit.html.DomNode;
@@ -105,7 +106,10 @@ protected void appendChildren(final HtmlSerializerTextBuilder builder, final Dom
105106
* @param mode the {@link Mode} to use for processing
106107
*/
107108
protected void appendNode(final HtmlSerializerTextBuilder builder, final DomNode node, final Mode mode) {
108-
if (node instanceof DomText) {
109+
if (node instanceof DomCDataSection) {
110+
// ignore
111+
}
112+
else if (node instanceof DomText) {
109113
appendText(builder, (DomText) node, mode);
110114
}
111115
else if (node instanceof DomComment) {

src/main/java/org/htmlunit/javascript/host/Element.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.htmlunit.css.ElementCssStyleDeclaration;
4343
import org.htmlunit.cssparser.parser.CSSException;
4444
import org.htmlunit.html.DomAttr;
45+
import org.htmlunit.html.DomCDataSection;
4546
import org.htmlunit.html.DomCharacterData;
4647
import org.htmlunit.html.DomComment;
4748
import org.htmlunit.html.DomElement;
@@ -1008,6 +1009,9 @@ protected void printNode(final StringBuilder builder, final DomNode node, final
10081009
builder.append("<!--").append(s).append("-->");
10091010
}
10101011
}
1012+
else if (node instanceof DomCDataSection) {
1013+
builder.append("<![CDATA[").append(node.getNodeValue()).append("]]>");
1014+
}
10111015
else if (node instanceof DomCharacterData) {
10121016
// Remove whitespace sequences, possibly escape XML characters.
10131017
String s = node.getNodeValue();

src/test/java/org/htmlunit/html/XHtmlPage2Test.java

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -91,18 +91,18 @@ public void selfClosingTextarea() throws Exception {
9191
@Alerts({"before<![CDATA[inside]]>after",
9292
"<div xmlns=\"http://www.w3.org/1999/xhtml\" id=\"tester\">before<![CDATA[inside]]>after</div>",
9393
"beforeinsideafter"})
94-
@HtmlUnitNYI(CHROME = {"before<!--[CDATA[inside]]-->after",
95-
"<div id=\"tester\">before<!--[CDATA[inside]]-->after</div>",
96-
"beforeafter"},
97-
EDGE = {"before<!--[CDATA[inside]]-->after",
98-
"<div id=\"tester\">before<!--[CDATA[inside]]-->after</div>",
99-
"beforeafter"},
100-
FF = {"before<!--[CDATA[inside]]-->after",
101-
"<div id=\"tester\">before<!--[CDATA[inside]]-->after</div>",
102-
"beforeafter"},
103-
FF_ESR = {"before<!--[CDATA[inside]]-->after",
104-
"<div id=\"tester\">before<!--[CDATA[inside]]-->after</div>",
105-
"beforeafter"})
94+
@HtmlUnitNYI(CHROME = {"before<![CDATA[inside]]>after",
95+
"<div id=\"tester\">before<![CDATA[inside]]>after</div>",
96+
"beforeinsideafter"},
97+
EDGE = {"before<![CDATA[inside]]>after",
98+
"<div id=\"tester\">before<![CDATA[inside]]>after</div>",
99+
"beforeinsideafter"},
100+
FF = {"before<![CDATA[inside]]>after",
101+
"<div id=\"tester\">before<![CDATA[inside]]>after</div>",
102+
"beforeinsideafter"},
103+
FF_ESR = {"before<![CDATA[inside]]>after",
104+
"<div id=\"tester\">before<![CDATA[inside]]>after</div>",
105+
"beforeinsideafter"})
106106
public void cdata() throws Exception {
107107
final String html
108108
= "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
@@ -141,18 +141,18 @@ public void cdata() throws Exception {
141141
@Alerts({"before<![CDATA[<div></div>]]>after",
142142
"<div xmlns=\"http://www.w3.org/1999/xhtml\" id=\"tester\">before<![CDATA[<div></div>]]>after</div>",
143143
"before<div></div>after"})
144-
@HtmlUnitNYI(CHROME = {"before<!--[CDATA[<div></div>]]-->after",
145-
"<div id=\"tester\">before<!--[CDATA[<div></div>]]-->after</div>",
146-
"beforeafter"},
147-
EDGE = {"before<!--[CDATA[<div></div>]]-->after",
148-
"<div id=\"tester\">before<!--[CDATA[<div></div>]]-->after</div>",
149-
"beforeafter"},
150-
FF = {"before<!--[CDATA[<div></div>]]-->after",
151-
"<div id=\"tester\">before<!--[CDATA[<div></div>]]-->after</div>",
152-
"beforeafter"},
153-
FF_ESR = {"before<!--[CDATA[<div></div>]]-->after",
154-
"<div id=\"tester\">before<!--[CDATA[<div></div>]]-->after</div>",
155-
"beforeafter"})
144+
@HtmlUnitNYI(CHROME = {"before<![CDATA[<div></div>]]>after",
145+
"<div id=\"tester\">before<![CDATA[<div></div>]]>after</div>",
146+
"before<div></div>after"},
147+
EDGE = {"before<![CDATA[<div></div>]]>after",
148+
"<div id=\"tester\">before<![CDATA[<div></div>]]>after</div>",
149+
"before<div></div>after"},
150+
FF = {"before<![CDATA[<div></div>]]>after",
151+
"<div id=\"tester\">before<![CDATA[<div></div>]]>after</div>",
152+
"before<div></div>after"},
153+
FF_ESR = {"before<![CDATA[<div></div>]]>after",
154+
"<div id=\"tester\">before<![CDATA[<div></div>]]>after</div>",
155+
"before<div></div>after"})
156156
public void earlyClosingCdata() throws Exception {
157157
final String html
158158
= "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"

0 commit comments

Comments
 (0)