@@ -221,6 +221,14 @@ public class Tokenizer implements Locator, Locator2 {
221221
222222 public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74 ;
223223
224+ public static final int COMMENT_LESSTHAN = 76 ;
225+
226+ public static final int COMMENT_LESSTHAN_BANG = 77 ;
227+
228+ public static final int COMMENT_LESSTHAN_BANG_DASH = 78 ;
229+
230+ public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79 ;
231+
224232 /**
225233 * Magic value for UTF-16 operations.
226234 */
@@ -1029,9 +1037,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10291037
10301038 // ]NOCPP]
10311039
1032- @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c )
1040+ @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c , boolean reportedConsecutiveHyphens )
10331041 throws SAXException {
1034- errConsecutiveHyphens ();
10351042 // [NOCPP[
10361043 switch (commentPolicy ) {
10371044 case ALTER_INFOSET :
@@ -1042,7 +1049,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10421049 appendStrBuf ('-' );
10431050 // CPPONLY: MOZ_FALLTHROUGH;
10441051 case ALLOW :
1045- warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1052+ if (!reportedConsecutiveHyphens ) {
1053+ warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1054+ }
10461055 // ]NOCPP]
10471056 appendStrBuf (c );
10481057 // [NOCPP[
@@ -1464,6 +1473,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
14641473 @ SuppressWarnings ("unused" ) private int stateLoop (int state , char c ,
14651474 int pos , @ NoLength char [] buf , boolean reconsume , int returnState ,
14661475 int endPos ) throws SAXException {
1476+ boolean reportedConsecutiveHyphens = false ;
14671477 /*
14681478 * Idioms used in this code:
14691479 *
@@ -2540,6 +2550,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
25402550 }
25412551 // CPPONLY: MOZ_FALLTHROUGH;
25422552 case COMMENT_START :
2553+ reportedConsecutiveHyphens = false ;
25432554 commentstartloop : for (;;) {
25442555 if (++pos == endPos ) {
25452556 break stateloop ;
@@ -2572,6 +2583,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
25722583 */
25732584 state = transition (state , Tokenizer .DATA , reconsume , pos );
25742585 continue stateloop ;
2586+ case '<' :
2587+ appendStrBuf (c );
2588+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2589+ continue stateloop ;
25752590 case '\r' :
25762591 appendStrBufCarriageReturn ();
25772592 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2617,6 +2632,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26172632 state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
26182633 break commentloop ;
26192634 // continue stateloop;
2635+ case '<' :
2636+ appendStrBuf (c );
2637+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2638+ continue stateloop ;
26202639 case '\r' :
26212640 appendStrBufCarriageReturn ();
26222641 break stateloop ;
@@ -2659,6 +2678,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26592678 state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
26602679 break commentenddashloop ;
26612680 // continue stateloop;
2681+ case '<' :
2682+ appendStrBuf (c );
2683+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2684+ continue stateloop ;
26622685 case '\r' :
26632686 appendStrBufCarriageReturn ();
26642687 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2713,11 +2736,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27132736 * Append a U+002D HYPHEN-MINUS (-) character to
27142737 * the comment token's data.
27152738 */
2716- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2739+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2740+ reportedConsecutiveHyphens = true ;
27172741 /*
27182742 * Stay in the comment end state.
27192743 */
27202744 continue ;
2745+ case '<' :
2746+ appendStrBuf (c );
2747+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2748+ continue stateloop ;
27212749 case '\r' :
27222750 adjustDoubleHyphenAndAppendToStrBufCarriageReturn ();
27232751 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2727,7 +2755,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27272755 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
27282756 continue stateloop ;
27292757 case '!' :
2730- errHyphenHyphenBang ();
27312758 appendStrBuf (c );
27322759 state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
27332760 continue stateloop ;
@@ -2740,7 +2767,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27402767 * and the input character to the comment
27412768 * token's data.
27422769 */
2743- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2770+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2771+ reportedConsecutiveHyphens = true ;
27442772 /*
27452773 * Switch to the comment state.
27462774 */
@@ -2810,6 +2838,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28102838 continue stateloop ;
28112839 }
28122840 }
2841+ case COMMENT_LESSTHAN :
2842+ for (;;) {
2843+ if (++pos == endPos ) {
2844+ break stateloop ;
2845+ }
2846+ c = checkChar (buf , pos );
2847+ switch (c ) {
2848+ case '!' :
2849+ appendStrBuf (c );
2850+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG , reconsume , pos );
2851+ continue stateloop ;
2852+ case '<' :
2853+ appendStrBuf (c );
2854+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2855+ continue stateloop ;
2856+ case '-' :
2857+ appendStrBuf (c );
2858+ state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2859+ continue stateloop ;
2860+ case '\r' :
2861+ appendStrBufCarriageReturn ();
2862+ break stateloop ;
2863+ case '\n' :
2864+ appendStrBufLineFeed ();
2865+ continue ;
2866+ case '\u0000' :
2867+ c = '\uFFFD' ;
2868+ // fall thru
2869+ default :
2870+ appendStrBuf (c );
2871+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2872+ continue stateloop ;
2873+ }
2874+ }
2875+ case COMMENT_LESSTHAN_BANG :
2876+ for (;;) {
2877+ if (++pos == endPos ) {
2878+ break stateloop ;
2879+ }
2880+ c = checkChar (buf , pos );
2881+ switch (c ) {
2882+ case '-' :
2883+ appendStrBuf (c );
2884+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH , reconsume , pos );
2885+ continue stateloop ;
2886+ case '<' :
2887+ appendStrBuf (c );
2888+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2889+ continue stateloop ;
2890+ case '\r' :
2891+ appendStrBufCarriageReturn ();
2892+ break stateloop ;
2893+ case '\n' :
2894+ appendStrBufLineFeed ();
2895+ continue ;
2896+ case '\u0000' :
2897+ c = '\uFFFD' ;
2898+ // fall thru
2899+ default :
2900+ appendStrBuf (c );
2901+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2902+ continue stateloop ;
2903+ }
2904+ }
2905+ case COMMENT_LESSTHAN_BANG_DASH :
2906+ for (;;) {
2907+ if (++pos == endPos ) {
2908+ break stateloop ;
2909+ }
2910+ c = checkChar (buf , pos );
2911+ switch (c ) {
2912+ case '-' :
2913+ appendStrBuf (c );
2914+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH_DASH , reconsume , pos );
2915+ continue stateloop ;
2916+ case '<' :
2917+ appendStrBuf (c );
2918+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2919+ continue stateloop ;
2920+ case '\r' :
2921+ appendStrBufCarriageReturn ();
2922+ break stateloop ;
2923+ case '\n' :
2924+ appendStrBufLineFeed ();
2925+ continue ;
2926+ case '\u0000' :
2927+ c = '\uFFFD' ;
2928+ // fall thru
2929+ default :
2930+ appendStrBuf (c );
2931+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2932+ continue stateloop ;
2933+ }
2934+ }
2935+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
2936+ for (;;) {
2937+ if (++pos == endPos ) {
2938+ break stateloop ;
2939+ }
2940+ c = checkChar (buf , pos );
2941+ switch (c ) {
2942+ case '>' :
2943+ appendStrBuf (c );
2944+ emitComment (3 , pos );
2945+ state = transition (state , Tokenizer .DATA , reconsume , pos );
2946+ continue stateloop ;
2947+ case '-' :
2948+ errNestedComment ();
2949+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2950+ reportedConsecutiveHyphens = true ;
2951+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2952+ continue stateloop ;
2953+ case '\r' :
2954+ errNestedComment ();
2955+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2956+ reportedConsecutiveHyphens = true ;
2957+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2958+ break stateloop ;
2959+ case '\n' :
2960+ errNestedComment ();
2961+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2962+ reportedConsecutiveHyphens = true ;
2963+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2964+ continue ;
2965+ case '\u0000' :
2966+ c = '\uFFFD' ;
2967+ // fall thru
2968+ case '!' :
2969+ errNestedComment ();
2970+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2971+ reportedConsecutiveHyphens = true ;
2972+ state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2973+ continue stateloop ;
2974+ default :
2975+ errNestedComment ();
2976+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2977+ reportedConsecutiveHyphens = true ;
2978+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2979+ continue stateloop ;
2980+ }
2981+ }
2982+ // XXX reorder point
28132983 case COMMENT_START_DASH :
28142984 if (++pos == endPos ) {
28152985 break stateloop ;
@@ -2838,6 +3008,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28383008 */
28393009 state = transition (state , Tokenizer .DATA , reconsume , pos );
28403010 continue stateloop ;
3011+ case '<' :
3012+ appendStrBuf (c );
3013+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
3014+ continue stateloop ;
28413015 case '\r' :
28423016 appendStrBufCarriageReturn ();
28433017 state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -5957,13 +6131,13 @@ private void initDoctypeFields() {
59576131 @ Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn ()
59586132 throws SAXException {
59596133 silentCarriageReturn ();
5960- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6134+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
59616135 }
59626136
59636137 @ Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed ()
59646138 throws SAXException {
59656139 silentLineFeed ();
5966- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6140+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
59676141 }
59686142
59696143 @ Inline private void appendStrBufLineFeed () {
@@ -6268,6 +6442,8 @@ public void eof() throws SAXException {
62686442 break eofloop ;
62696443 case COMMENT_START :
62706444 case COMMENT :
6445+ case COMMENT_LESSTHAN :
6446+ case COMMENT_LESSTHAN_BANG :
62716447 /*
62726448 * EOF Parse error.
62736449 */
@@ -6279,6 +6455,7 @@ public void eof() throws SAXException {
62796455 */
62806456 break eofloop ;
62816457 case COMMENT_END :
6458+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
62826459 errEofInComment ();
62836460 /* Emit the comment token. */
62846461 emitComment (2 , 0 );
@@ -6288,6 +6465,7 @@ public void eof() throws SAXException {
62886465 break eofloop ;
62896466 case COMMENT_END_DASH :
62906467 case COMMENT_START_DASH :
6468+ case COMMENT_LESSTHAN_BANG_DASH :
62916469 errEofInComment ();
62926470 /* Emit the comment token. */
62936471 emitComment (1 , 0 );
@@ -6917,7 +7095,7 @@ protected void errGtInPublicId() throws SAXException {
69177095 protected void errNamelessDoctype () throws SAXException {
69187096 }
69197097
6920- protected void errConsecutiveHyphens () throws SAXException {
7098+ protected void errNestedComment () throws SAXException {
69217099 }
69227100
69237101 protected void errPrematureEndOfComment () throws SAXException {
@@ -7060,9 +7238,6 @@ protected void errExpectedSystemId() throws SAXException {
70607238 protected void errMissingSpaceBeforeDoctypeName () throws SAXException {
70617239 }
70627240
7063- protected void errHyphenHyphenBang () throws SAXException {
7064- }
7065-
70667241 protected void errNcrControlChar () throws SAXException {
70677242 }
70687243
0 commit comments