Skip to content

Commit 2df4954

Browse files
committed
introduce option NekoReaderBufferSize
1 parent da99f0e commit 2df4954

12 files changed

Lines changed: 135 additions & 44 deletions

File tree

src/changes/changes.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88

99
<body>
1010
<release version="4.12.0" date="April xx, 2025" description="Chrome/Edge 135, Firefox 137, Rhino RegExp, Bugfixes">
11+
<action type="add" dev="rbri">
12+
WebClient option NekoReaderBufferSize added. You can use this to increase the buffer size used by
13+
the Neko Html parser to optimize parsing performance.
14+
</action>
15+
<action type="update" dev="rbri">
16+
HTMLParser parseFragment(DomNode, String), parse(WebResponse, HtmlPage, boolean, boolean), and
17+
parseFragment(DomNode, DomNode, String, boolean) are deprecated.
18+
</action>
1119
<action type="update" dev="rbri">
1220
neko: HTMLScanner always requires a document handler; the null check is moved to the setter
1321
and all the others are removed.

src/main/java/org/htmlunit/DefaultPageCreator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ protected HtmlPage createHtmlPage(final WebResponse webResponse, final WebWindow
297297
final HtmlPage page = new HtmlPage(webResponse, webWindow);
298298
webWindow.setEnclosedPage(page);
299299

300-
HTML_PARSER.parse(webResponse, page, false, false);
300+
HTML_PARSER.parse(webWindow.getWebClient(), webResponse, page, false, false);
301301
return page;
302302
}
303303

@@ -313,7 +313,7 @@ protected XHtmlPage createXHtmlPage(final WebResponse webResponse, final WebWind
313313
final XHtmlPage page = new XHtmlPage(webResponse, webWindow);
314314
webWindow.setEnclosedPage(page);
315315

316-
HTML_PARSER.parse(webResponse, page, true, false);
316+
HTML_PARSER.parse(webWindow.getWebClient(), webResponse, page, true, false);
317317
return page;
318318
}
319319

src/main/java/org/htmlunit/WebClient.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2875,7 +2875,7 @@ public HtmlPage loadHtmlCodeIntoCurrentWindow(final String htmlCode) throws IOEx
28752875
final HtmlPage page = new HtmlPage(webResponse, webWindow);
28762876
webWindow.setEnclosedPage(page);
28772877

2878-
htmlParser.parse(webResponse, page, false, false);
2878+
htmlParser.parse(this, webResponse, page, false, false);
28792879
return page;
28802880
}
28812881

@@ -2896,7 +2896,7 @@ public XHtmlPage loadXHtmlCodeIntoCurrentWindow(final String xhtmlCode) throws I
28962896
final XHtmlPage page = new XHtmlPage(webResponse, webWindow);
28972897
webWindow.setEnclosedPage(page);
28982898

2899-
htmlParser.parse(webResponse, page, true, false);
2899+
htmlParser.parse(this, webResponse, page, true, false);
29002900
return page;
29012901
}
29022902

src/main/java/org/htmlunit/WebClientOptions.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ public class WebClientOptions implements Serializable {
8383
private boolean geolocationEnabled_;
8484
private Geolocation geolocation_;
8585

86+
private int nekoReaderBufferSize_ = -1;
87+
8688
private boolean webSocketEnabled_ = true;
8789
private int webSocketMaxTextMessageSize_ = -1;
8890
private int webSocketMaxTextMessageBufferSize_ = -1;
@@ -726,6 +728,23 @@ public int getScreenHeight() {
726728
return screenHeight_;
727729
}
728730

731+
/**
732+
* @return the Neko Html parser reader buffer size
733+
*/
734+
public int getNekoReaderBufferSize() {
735+
return nekoReaderBufferSize_;
736+
}
737+
738+
/**
739+
* Sets the Neko Html parser reader buffer size.
740+
*
741+
* @param webSocketMaxTextMessageSize the new value
742+
*/
743+
public void setNekoReaderBufferSize(final int nekoReaderBufferSize) {
744+
nekoReaderBufferSize_ = nekoReaderBufferSize;
745+
}
746+
747+
729748
/**
730749
* Enables/disables WebSocket support. By default, this property is enabled.
731750
*

src/main/java/org/htmlunit/html/DomNode.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1120,7 +1120,8 @@ public void removeAllChildren() {
11201120
* @throws SAXException in case of error
11211121
*/
11221122
public void parseHtmlSnippet(final String source) throws SAXException, IOException {
1123-
getPage().getWebClient().getPageCreator().getHtmlParser().parseFragment(this, source);
1123+
final WebClient webClient = getPage().getWebClient();
1124+
webClient.getPageCreator().getHtmlParser().parseFragment(webClient, this, this, source, false);
11241125
}
11251126

11261127
/**

src/main/java/org/htmlunit/html/parser/HTMLParser.java

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import java.io.IOException;
1818

1919
import org.htmlunit.SgmlPage;
20+
import org.htmlunit.WebClient;
2021
import org.htmlunit.WebResponse;
2122
import org.htmlunit.html.DomNode;
2223
import org.htmlunit.html.ElementFactory;
@@ -75,22 +76,61 @@ ElementFactory getElementFactory(SgmlPage page, String namespaceURI,
7576
* @param source the (X)HTML to be parsed
7677
* @throws SAXException if a SAX error occurs
7778
* @throws IOException if an IO error occurs
79+
*
80+
* @deprecated as of version 4.12.0; use
81+
* {@link #parseFragment(WebClient, DomNode, DomNode, String, boolean)} instead.
7882
*/
79-
void parseFragment(DomNode parent, String source) throws SAXException, IOException;
83+
@Deprecated
84+
default void parseFragment(DomNode parent, String source) throws SAXException, IOException {
85+
parseFragment(null, parent, parent, source, false);
86+
}
8087

8188
/**
8289
* Parses the HTML content from the given string into an object tree representation.
8390
*
91+
* @param webClient the {@link WebClient}
8492
* @param parent where the new parsed nodes will be added to
8593
* @param context the context to build the fragment context stack
8694
* @param source the (X)HTML to be parsed
8795
* @param createdByJavascript if true the (script) tag was created by javascript
8896
* @throws SAXException if a SAX error occurs
8997
* @throws IOException if an IO error occurs
9098
*/
91-
void parseFragment(DomNode parent, DomNode context, String source,
99+
void parseFragment(WebClient webClient, DomNode parent, DomNode context, String source,
92100
boolean createdByJavascript) throws SAXException, IOException;
93101

102+
/**
103+
* Parses the HTML content from the given string into an object tree representation.
104+
*
105+
* @param parent where the new parsed nodes will be added to
106+
* @param context the context to build the fragment context stack
107+
* @param source the (X)HTML to be parsed
108+
* @param createdByJavascript if true the (script) tag was created by javascript
109+
* @throws SAXException if a SAX error occurs
110+
* @throws IOException if an IO error occurs
111+
*
112+
* @deprecated as of version 4.12.0; use
113+
* {@link #parseFragment(WebClient, DomNode, DomNode, String, boolean)} instead.
114+
*/
115+
@Deprecated
116+
default void parseFragment(DomNode parent, DomNode context, String source,
117+
boolean createdByJavascript) throws SAXException, IOException {
118+
parseFragment(null, parent, context, source, createdByJavascript);
119+
}
120+
121+
/**
122+
* Parses the WebResponse into an object tree representation.
123+
*
124+
* @param webClient the {@link WebClient}
125+
* @param webResponse the response data
126+
* @param page the HtmlPage to add the nodes
127+
* @param xhtml if true use the XHtml parser
128+
* @param createdByJavascript if true the (script) tag was created by javascript
129+
* @throws IOException if there is an IO error
130+
*/
131+
void parse(WebClient webClient, WebResponse webResponse, HtmlPage page,
132+
boolean xhtml, boolean createdByJavascript) throws IOException;
133+
94134
/**
95135
* Parses the WebResponse into an object tree representation.
96136
*
@@ -99,6 +139,12 @@ void parseFragment(DomNode parent, DomNode context, String source,
99139
* @param xhtml if true use the XHtml parser
100140
* @param createdByJavascript if true the (script) tag was created by javascript
101141
* @throws IOException if there is an IO error
142+
*
143+
* @deprecated as of version 4.12.0; use
144+
* {@link #parse(WebClient, WebResponse, HtmlPage, boolean, boolean)} instead.
102145
*/
103-
void parse(WebResponse webResponse, HtmlPage page, boolean xhtml, boolean createdByJavascript) throws IOException;
146+
@Deprecated
147+
default void parse(WebResponse webResponse, HtmlPage page, boolean xhtml, boolean createdByJavascript) throws IOException {
148+
parse(null, webResponse, page, xhtml, createdByJavascript);
149+
}
104150
}

src/main/java/org/htmlunit/html/parser/neko/HtmlUnitNekoHtmlParser.java

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.htmlunit.Page;
3030
import org.htmlunit.SgmlPage;
3131
import org.htmlunit.WebAssert;
32+
import org.htmlunit.WebClient;
3233
import org.htmlunit.WebResponse;
3334
import org.htmlunit.cyberneko.HTMLScanner;
3435
import org.htmlunit.cyberneko.HTMLTagBalancer;
@@ -83,30 +84,10 @@ public final class HtmlUnitNekoHtmlParser implements HTMLParser {
8384
}
8485

8586
/**
86-
* Parses the HTML content from the given string into an object tree representation.
87-
*
88-
* @param parent the parent for the new nodes
89-
* @param source the (X)HTML to be parsed
90-
* @throws SAXException if a SAX error occurs
91-
* @throws IOException if an IO error occurs
87+
*{@inheritDoc}
9288
*/
9389
@Override
94-
public void parseFragment(final DomNode parent, final String source) throws SAXException, IOException {
95-
parseFragment(parent, parent, source, false);
96-
}
97-
98-
/**
99-
* Parses the HTML content from the given string into an object tree representation.
100-
*
101-
* @param parent where the new parsed nodes will be added to
102-
* @param context the context to build the fragment context stack
103-
* @param source the (X)HTML to be parsed
104-
* @param createdByJavascript if true the (script) tag was created by javascript
105-
* @throws SAXException if a SAX error occurs
106-
* @throws IOException if an IO error occurs
107-
*/
108-
@Override
109-
public void parseFragment(final DomNode parent, final DomNode context, final String source,
90+
public void parseFragment(final WebClient webClient, final DomNode parent, final DomNode context, final String source,
11091
final boolean createdByJavascript)
11192
throws SAXException, IOException {
11293
final Page page = parent.getPage();
@@ -153,16 +134,10 @@ else if (ancestors.size() == 1
153134
}
154135

155136
/**
156-
* Parses the WebResponse into an object tree representation.
157-
*
158-
* @param webResponse the response data
159-
* @param page the HtmlPage to add the nodes
160-
* @param xhtml if true use the XHtml parser
161-
* @param createdByJavascript if true the (script) tag was created by javascript
162-
* @throws IOException if there is an IO error
137+
* {@inheritDoc}
163138
*/
164139
@Override
165-
public void parse(final WebResponse webResponse, final HtmlPage page,
140+
public void parse(final WebClient webClient, final WebResponse webResponse, final HtmlPage page,
166141
final boolean xhtml, final boolean createdByJavascript) throws IOException {
167142
final URL url = webResponse.getWebRequest().getUrl();
168143
final HtmlUnitNekoDOMBuilder domBuilder =
@@ -182,6 +157,13 @@ public void parse(final WebResponse webResponse, final HtmlPage page,
182157
domBuilder.setFeature(HTMLScanner.STYLE_STRIP_CDATA_DELIMS, true);
183158
domBuilder.setFeature(HTMLScanner.CDATA_EARLY_CLOSING, false);
184159
}
160+
161+
if (webClient != null) {
162+
final int bufferSize = webClient.getOptions().getNekoReaderBufferSize();
163+
if (bufferSize > 0) {
164+
domBuilder.setProperty(HTMLScanner.READER_BUFFER_SIZE, bufferSize);
165+
}
166+
}
185167
}
186168
catch (final Exception e) {
187169
throw new ObjectInstantiationException("Error setting HTML parser feature", e);

src/main/java/org/htmlunit/javascript/host/dom/DOMImplementation.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import java.io.IOException;
1818

1919
import org.htmlunit.StringWebResponse;
20+
import org.htmlunit.WebClient;
2021
import org.htmlunit.WebResponse;
2122
import org.htmlunit.WebWindow;
2223
import org.htmlunit.html.Html;
@@ -263,8 +264,9 @@ public HTMLDocument createHTMLDocument(final Object titleObj) {
263264
// document.setWindow(getWindow());
264265
document.setDomNode(page);
265266

266-
final HTMLParser htmlParser = webWindow.getWebClient().getPageCreator().getHtmlParser();
267-
htmlParser.parse(webResponse, page, false, false);
267+
final WebClient webClient = webWindow.getWebClient();
268+
final HTMLParser htmlParser = webClient.getPageCreator().getHtmlParser();
269+
htmlParser.parse(webClient, webResponse, page, false, false);
268270
return page.getScriptableObject();
269271
}
270272
catch (final IOException e) {

src/main/java/org/htmlunit/javascript/host/dom/DOMParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ public static Document parseHtmlDocument(final HtmlUnitScriptable scriptable, fi
145145

146146
final WebClient webClient = webWindow.getWebClient();
147147
final HTMLParser htmlParser = webClient.getPageCreator().getHtmlParser();
148-
htmlParser.parse(webResponse, page, false, true);
148+
htmlParser.parse(webClient, webResponse, page, false, true);
149149
return page.getScriptableObject();
150150
}
151151
}

src/main/java/org/htmlunit/javascript/host/dom/Range.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import org.apache.commons.logging.LogFactory;
2020
import org.htmlunit.SgmlPage;
21+
import org.htmlunit.WebClient;
2122
import org.htmlunit.html.DomDocumentFragment;
2223
import org.htmlunit.html.DomNode;
2324
import org.htmlunit.html.impl.SimpleRange;
@@ -258,8 +259,9 @@ public HtmlUnitScriptable createContextualFragment(final String valueAsString) {
258259
final SgmlPage page = internGetStartContainer().getDomNodeOrDie().getPage();
259260
final DomDocumentFragment fragment = new DomDocumentFragment(page);
260261
try {
261-
page.getWebClient().getPageCreator().getHtmlParser()
262-
.parseFragment(fragment, internGetStartContainer().getDomNodeOrDie(), valueAsString, false);
262+
final WebClient webClient = page.getWebClient();
263+
webClient.getPageCreator().getHtmlParser()
264+
.parseFragment(webClient, fragment, internGetStartContainer().getDomNodeOrDie(), valueAsString, false);
263265
}
264266
catch (final Exception e) {
265267
LogFactory.getLog(Range.class).error("Unexpected exception occurred in createContextualFragment", e);

0 commit comments

Comments
 (0)