@@ -107,6 +107,9 @@ var PR_ATTRIB_NAME = 'atn';
107107/** token style for an sgml attribute value. */
108108var PR_ATTRIB_VALUE = 'atv' ;
109109
110+ /** the number of characters between tab columns */
111+ var PR_TAB_WIDTH = 8 ;
112+
110113/** the position of the end of a token during. A division of a string into
111114 * n tokens can be represented as a series n - 1 token ends, as long as
112115 * runs of whitespace warrant their own token.
@@ -178,7 +181,7 @@ PR_DecodeHelper.prototype.decode = function (s, i) {
178181 charCode = parseInt ( entityName . substring ( 1 ) , 10 ) ;
179182 }
180183 if ( ! isNaN ( charCode ) ) {
181- decodedEntity = String . fromCharCode ( charCode ) ;
184+ decoded = String . fromCharCode ( charCode ) ;
182185 }
183186 }
184187 if ( ! decoded ) {
@@ -328,6 +331,64 @@ function PR_normalizedHtml(node, out) {
328331 }
329332}
330333
334+ /** expand tabs to spaces
335+ * @param {Array } chunks PR_Tokens possibly containing tabs
336+ * @param {Number } tabWidth number of spaces between tab columns
337+ * @return {Array } chunks with tabs replaced with spaces
338+ */
339+ function PR_expandTabs ( chunks , tabWidth ) {
340+ var SPACES = ' ' ;
341+
342+ var charInLine = 0 ;
343+ var decodeHelper = new PR_DecodeHelper ( ) ;
344+
345+ var chunksOut = [ ]
346+ for ( var chunkIndex = 0 ; chunkIndex < chunks . length ; ++ chunkIndex ) {
347+ var chunk = chunks [ chunkIndex ] ;
348+ if ( chunk . style == null ) {
349+ chunksOut . push ( chunk ) ;
350+ continue ;
351+ }
352+
353+ var s = chunk . token ;
354+ var pos = 0 ; // index of last character output
355+ var out = [ ] ;
356+
357+ // walk over each character looking for tabs and newlines.
358+ // On tabs, expand them. On newlines, reset charInLine.
359+ // Otherwise increment charInLine
360+ for ( var charIndex = 0 , n = s . length ; charIndex < n ;
361+ charIndex = decodeHelper . next ) {
362+ decodeHelper . decode ( s , charIndex ) ;
363+ var ch = decodeHelper . ch ;
364+
365+ switch ( ch ) {
366+ case '\t' :
367+ out . push ( s . substring ( pos , charIndex ) ) ;
368+ // calculate how much space we need in front of this part
369+ // nSpaces is the amount of padding -- the number of spaces needed to
370+ // move us to the next column, where columns occur at factors of
371+ // tabWidth.
372+ var nSpaces = tabWidth - ( charInLine % tabWidth ) ;
373+ charInLine += nSpaces ;
374+ for ( ; nSpaces >= 0 ; nSpaces -= SPACES . length ) {
375+ out . push ( SPACES . substring ( 0 , nSpaces ) ) ;
376+ }
377+ pos = decodeHelper . next ;
378+ break ;
379+ case '\n' : case '\r' :
380+ charInLine = 0 ;
381+ break ;
382+ default :
383+ ++ charInLine ;
384+ }
385+ }
386+ out . push ( s . substring ( pos ) ) ;
387+ chunksOut . push ( new PR_Token ( out . join ( '' ) , chunk . style ) ) ;
388+ }
389+ return chunksOut
390+ }
391+
331392/** split markup into chunks of html tags (style null) and
332393 * plain text (style {@link #PR_PLAIN}).
333394 *
@@ -1262,9 +1323,13 @@ function PR_lexMarkup(chunks) {
12621323 return tokensOut ;
12631324}
12641325
1265- /** classify the string as either source or markup and lex appropriately. */
1266- function PR_lexOne ( s ) {
1267- var chunks = PR_chunkify ( s ) ;
1326+ /**
1327+ * classify the string as either source or markup and lex appropriately.
1328+ * @param {String } html
1329+ */
1330+ function PR_lexOne ( html ) {
1331+ var chunks = PR_expandTabs ( PR_chunkify ( html ) , PR_TAB_WIDTH ) ;
1332+
12681333 // treat it as markup if the first non whitespace character is a < and the
12691334 // last non-whitespace character is a >
12701335 var isMarkup = false ;
0 commit comments