Skip to content

Commit f7a954d

Browse files
committed
Inline tag open in Tokenizer to further improve performances
1 parent b3ef91f commit f7a954d

1 file changed

Lines changed: 28 additions & 33 deletions

File tree

src/HTML5/Parser/Tokenizer.php

Lines changed: 28 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -121,16 +121,30 @@ public function setTextMode($textmode, $untilTag = null)
121121
*/
122122
protected function consumeData()
123123
{
124-
// Character Ref
125-
/*
126-
* $this->characterReference() || $this->tagOpen() || $this->eof() || $this->characterData();
127-
*/
124+
// Character reference
128125
$this->characterReference();
129-
$this->tagOpen();
130-
$this->eof();
131126

132-
// Inline the parsing of characters as it's the critical performance path
127+
// Parse tag
128+
if ($this->scanner->current() === '<') {
129+
// Any buffered text data can go out now.
130+
$this->flushBuffer();
131+
132+
$tok = $this->scanner->next();
133+
134+
$this->markupDeclaration($tok)
135+
|| $this->endTag()
136+
|| $this->processingInstruction()
137+
|| $this->tagName()
138+
// This always returns false.
139+
|| $this->parseError("Illegal tag opening")
140+
|| $this->characterData();
141+
}
142+
143+
// Handle end of document
133144
$tok = $this->scanner->current();
145+
$this->eof($tok);
146+
147+
// Parse character
134148
if ($tok !== false) {
135149
switch ($this->textMode) {
136150
case Elements::TEXT_RAW:
@@ -272,15 +286,17 @@ protected function rcdata($tok)
272286
/**
273287
* If the document is read, emit an EOF event.
274288
*/
275-
protected function eof()
289+
protected function eof($tok)
276290
{
277-
if ($this->scanner->current() === false) {
291+
if ($tok === false) {
278292
// fprintf(STDOUT, "EOF");
279293
$this->flushBuffer();
280294
$this->events->eof();
281295
$this->carryOn = false;
296+
282297
return true;
283298
}
299+
284300
return false;
285301
}
286302

@@ -302,33 +318,12 @@ protected function characterReference()
302318
return false;
303319
}
304320

305-
/**
306-
* Emit a tagStart event on encountering a tag.
307-
*
308-
* 8.2.4.8
309-
*/
310-
protected function tagOpen()
311-
{
312-
if ($this->scanner->current() != '<') {
313-
return false;
314-
}
315-
316-
// Any buffered text data can go out now.
317-
$this->flushBuffer();
318-
319-
$this->scanner->next();
320-
321-
return $this->markupDeclaration() || $this->endTag() || $this->processingInstruction() || $this->tagName() ||
322-
// This always returns false.
323-
$this->parseError("Illegal tag opening") || $this->characterData();
324-
}
325-
326321
/**
327322
* Look for markup.
328323
*/
329-
protected function markupDeclaration()
324+
protected function markupDeclaration($tok)
330325
{
331-
if ($this->scanner->current() != '!') {
326+
if ($tok != '!') {
332327
return false;
333328
}
334329

@@ -756,7 +751,7 @@ protected function doctype()
756751
// EOF: die.
757752
if ($tok === false) {
758753
$this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', true);
759-
return $this->eof();
754+
return $this->eof($tok);
760755
}
761756

762757
// NULL char: convert.

0 commit comments

Comments
 (0)