|
30 | 30 | * @author mikesamuel@gmail.com |
31 | 31 | */ |
32 | 32 |
|
| 33 | +// This file is a call to a function defined in prettify.js which defines a |
| 34 | +// lexical scanner for CSS and maps tokens to styles. |
| 35 | + |
| 36 | +// The call to PR['registerLangHandler'] is quoted so that Closure Compiler |
| 37 | +// will not rename the call so that this language extensions can be |
| 38 | +// compiled/minified separately from one another. Other symbols defined in |
| 39 | +// prettify.js are similarly quoted. |
| 40 | + |
| 41 | +// The call is structured thus: |
| 42 | +// PR['registerLangHandler']( |
| 43 | +// PR['createSimpleLexer']( |
| 44 | +// shortcutPatterns, |
| 45 | +// fallThroughPatterns), |
| 46 | +// [languageId0, ..., languageIdN]) |
| 47 | + |
| 48 | +// Langugage IDs |
| 49 | +// ============= |
| 50 | +// The language IDs are typically the file extensions of source files for |
| 51 | +// that language so that users can syntax highlight arbitrary files based |
| 52 | +// on just the extension. This is heuristic, but works pretty well in |
| 53 | +// practice. |
| 54 | + |
| 55 | +// Patterns |
| 56 | +// ======== |
| 57 | +// Lexers are typically implemented as a set of regular expressions. |
| 58 | +// The SimpleLexer function takes regular expressions, styles, and some |
| 59 | +// pragma-info and produces a lexer. A token description looks like |
| 60 | +// [STYLE_NAME, /regular-expression/, pragmas] |
| 61 | + |
| 62 | +// Initially, simple lexer's inner loop looked like: |
| 63 | + |
| 64 | +// while sourceCode is not empty: |
| 65 | +// try each regular expression in order until one matches |
| 66 | +// remove the matched portion from sourceCode |
| 67 | + |
| 68 | +// This was really slow for large files because some JS interpreters |
| 69 | +// do a buffer copy on the matched portion which is O(n*n) |
| 70 | + |
| 71 | +// The current loop now looks like |
| 72 | + |
| 73 | +// 1. use js-modules/combinePrefixPatterns.js to |
| 74 | +// combine all regular expressions into one |
| 75 | +// 2. use a single global regular expresion match to extract all tokens |
| 76 | +// 3. for each token try regular expressions in order until one matches it |
| 77 | +// and classify it using the associated style |
| 78 | + |
| 79 | +// This is a lot more efficient but it does mean that lookahead and lookbehind |
| 80 | +// can't be used across boundaries to classify tokens. |
| 81 | + |
| 82 | +// Sometimes we need lookahead and lookbehind and sometimes we want to handle |
| 83 | +// embedded language -- JavaScript or CSS embedded in HTML, or inline assembly |
| 84 | +// in C. |
| 85 | + |
| 86 | +// If a particular pattern has a numbered group, and its style pattern starts |
| 87 | +// with "lang-" as in |
| 88 | +// ['lang-js', /<script>(.*?)<\/script>/] |
| 89 | +// then the token classification step breaks the token into pieces. |
| 90 | +// Group 1 is re-parsed using the language handler for "lang-js", and the |
| 91 | +// surrounding portions are reclassified using the current language handler. |
| 92 | +// This mechanism gives us both lookahead, lookbehind, and language embedding. |
| 93 | + |
| 94 | +// Shortcut Patterns |
| 95 | +// ================= |
| 96 | +// A shortcut pattern is one that is tried before other patterns if the first |
| 97 | +// character in the token is in the string of characters. |
| 98 | +// This very effectively lets us make quick correct decisions for common token |
| 99 | +// types. |
| 100 | + |
| 101 | +// All other patterns are fall-through patterns. |
| 102 | + |
| 103 | + |
| 104 | + |
| 105 | +// The comments inline below refer to productions in the CSS specification's |
| 106 | +// lexical grammar. See link above. |
33 | 107 | PR['registerLangHandler']( |
34 | 108 | PR['createSimpleLexer']( |
| 109 | + // Shortcut patterns. |
35 | 110 | [ |
36 | 111 | // The space production <s> |
37 | 112 | [PR['PR_PLAIN'], /^[ \t\r\n\f]+/, null, ' \t\r\n\f'] |
38 | 113 | ], |
| 114 | + // Fall-through patterns. |
39 | 115 | [ |
40 | 116 | // Quoted strings. <string1> and <string2> |
41 | 117 | [PR['PR_STRING'], |
@@ -63,13 +139,18 @@ PR['registerLangHandler']( |
63 | 139 | [PR['PR_PUNCTUATION'], /^[^\s\w\'\"]+/] |
64 | 140 | ]), |
65 | 141 | ['css']); |
| 142 | +// Above we use embedded languages to highlight property names (identifiers |
| 143 | +// followed by a colon) differently from identifiers in values. |
66 | 144 | PR['registerLangHandler']( |
67 | 145 | PR['createSimpleLexer']([], |
68 | 146 | [ |
69 | 147 | [PR['PR_KEYWORD'], |
70 | 148 | /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i] |
71 | 149 | ]), |
72 | 150 | ['css-kw']); |
| 151 | +// The content of an unquoted URL literal like url(http://foo/img.png) should |
| 152 | +// be colored as string content. This language handler is used above in the |
| 153 | +// URL production to do so. |
73 | 154 | PR['registerLangHandler']( |
74 | 155 | PR['createSimpleLexer']([], |
75 | 156 | [ |
|
0 commit comments