Skip to content
This repository was archived by the owner on Apr 22, 2020. It is now read-only.

Commit edcf1ed

Browse files
author
mikesamuel@gmail.com
committed
issue 165: hashes after curly brackets should not be treated as a line comment in bash/perl
1 parent 1ac1eb0 commit edcf1ed

3 files changed

Lines changed: 93 additions & 5 deletions

File tree

js-modules/prettify.js

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ var prettyPrint;
469469
} else {
470470
// Stop C preprocessor declarations at an unclosed open comment
471471
shortcutStylePatterns.push(
472-
[PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/,
472+
[PR_COMMENT, /^#(?:(?:define|e(?:l|nd)if|else|error|ifn?def|include|line|pragma|undef|warning)\b|[^\r\n]*)/,
473473
null, '#']);
474474
}
475475
// #include <stdio.h>
@@ -523,6 +523,45 @@ var prettyPrint;
523523
}
524524

525525
shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']);
526+
527+
var punctuation =
528+
// The Bash man page says
529+
530+
// A word is a sequence of characters considered as a single
531+
// unit by GRUB. Words are separated by metacharacters,
532+
// which are the following plus space, tab, and newline: { }
533+
// | & $ ; < >
534+
// ...
535+
536+
// A word beginning with # causes that word and all remaining
537+
// characters on that line to be ignored.
538+
539+
// which means that only a '#' after /(?:^|[{}|&$;<>\s])/ starts a
540+
// comment but empirically
541+
// $ echo {#}
542+
// {#}
543+
// $ echo \$#
544+
// $#
545+
// $ echo }#
546+
// }#
547+
548+
// so /(?:^|[|&;<>\s])/ is more appropriate.
549+
550+
// http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC3
551+
// suggests that this definition is compatible with a
552+
// default mode that tries to use a single token definition
553+
// to recognize both bash/python style comments and C
554+
// preprocessor directives.
555+
556+
// This definition of punctuation does not include # in the list of
557+
// follow-on exclusions, so # will not be broken before if preceeded
558+
// by a punctuation character. We could try to exclude # after
559+
// [|&;<>] but that doesn't seem to cause many major problems.
560+
// If that does turn out to be a problem, we should change the below
561+
// when hc is truthy to include # in the run of punctuation characters
562+
// only when not followint [|&;<>].
563+
/^.[^\s\w\.$@\'\"\`\/\\]*/;
564+
526565
fallthroughStylePatterns.push(
527566
// TODO(mikesamuel): recognize non-latin letters and numerals in idents
528567
[PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null],
@@ -543,7 +582,7 @@ var prettyPrint;
543582
null, '0123456789'],
544583
// Don't treat escaped quotes in bash as starting strings. See issue 144.
545584
[PR_PLAIN, /^\\[\s\S]?/, null],
546-
[PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]);
585+
[PR_PUNCTUATION, punctuation, null]);
547586

548587
return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);
549588
}

src/prettify.js

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\\*=?|[
829829
} else {
830830
// Stop C preprocessor declarations at an unclosed open comment
831831
shortcutStylePatterns.push(
832-
[PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/,
832+
[PR_COMMENT, /^#(?:(?:define|e(?:l|nd)if|else|error|ifn?def|include|line|pragma|undef|warning)\b|[^\r\n]*)/,
833833
null, '#']);
834834
}
835835
// #include <stdio.h>
@@ -883,6 +883,45 @@ var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\\*=?|[
883883
}
884884

885885
shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']);
886+
887+
var punctuation =
888+
// The Bash man page says
889+
890+
// A word is a sequence of characters considered as a single
891+
// unit by GRUB. Words are separated by metacharacters,
892+
// which are the following plus space, tab, and newline: { }
893+
// | & $ ; < >
894+
// ...
895+
896+
// A word beginning with # causes that word and all remaining
897+
// characters on that line to be ignored.
898+
899+
// which means that only a '#' after /(?:^|[{}|&$;<>\s])/ starts a
900+
// comment but empirically
901+
// $ echo {#}
902+
// {#}
903+
// $ echo \$#
904+
// $#
905+
// $ echo }#
906+
// }#
907+
908+
// so /(?:^|[|&;<>\s])/ is more appropriate.
909+
910+
// http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC3
911+
// suggests that this definition is compatible with a
912+
// default mode that tries to use a single token definition
913+
// to recognize both bash/python style comments and C
914+
// preprocessor directives.
915+
916+
// This definition of punctuation does not include # in the list of
917+
// follow-on exclusions, so # will not be broken before if preceeded
918+
// by a punctuation character. We could try to exclude # after
919+
// [|&;<>] but that doesn't seem to cause many major problems.
920+
// If that does turn out to be a problem, we should change the below
921+
// when hc is truthy to include # in the run of punctuation characters
922+
// only when not followint [|&;<>].
923+
/^.[^\s\w\.$@\'\"\`\/\\]*/;
924+
886925
fallthroughStylePatterns.push(
887926
// TODO(mikesamuel): recognize non-latin letters and numerals in idents
888927
[PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null],
@@ -903,7 +942,7 @@ var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\\*=?|[
903942
null, '0123456789'],
904943
// Don't treat escaped quotes in bash as starting strings. See issue 144.
905944
[PR_PLAIN, /^\\[\s\S]?/, null],
906-
[PR_PUNCTUATION, /^.[^\s\w\.$@\'\"\`\/\#\\]*/, null]);
945+
[PR_PUNCTUATION, punctuation, null]);
907946

908947
return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);
909948
}

tests/prettify_test.html

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,11 @@ <h1>Bash w/ language specified</h1>
8787
fib | /usr/bin/*head -10 | tail -1
8888
</pre>
8989

90+
<h1>Issue 165</h1>
91+
<pre class="prettyprint lang-sh" id="issue_165"># Comment
92+
local $x = ${#x[@]} # Previous is not a comment
93+
# A comment</pre>
94+
9095
<h1>C</h1>
9196

9297
<pre class="prettyprint" id="C">
@@ -1368,7 +1373,12 @@ <h1>Go mode</h1>
13681373
'`#6`PLNfib `END`PUN|`END`PLN `END`PUN/`END`PLNusr`END`PUN/`END`PLNbin`END' +
13691374
'`PUN/*`END`PLNhead `END`PUN-`END`LIT10`END`PLN `END`PUN|`END' +
13701375
'`PLN tail `END`PUN-`END`LIT1`END</li></ol>'),
1371-
C: (
1376+
issue_165: (
1377+
'`COM# Comment`END`PLN\n' +
1378+
'`END`KWDlocal`END`PLN $x `END`PUN=`END`PLN $`END`PUN{#`END`PLNx`END`PUN[@]}`END`PLN `END'
1379+
+ '`COM# Previous is not a comment`END`PLN\n' +
1380+
'`END`COM# A comment`END'),
1381+
C: (
13721382
'`COM#include`END`PLN `END`STR&lt;stdio.h&gt;`END`PLN\n' +
13731383
'\n' +
13741384
'`END`COM/* the n-th fibonacci number.\n' +

0 commit comments

Comments
 (0)