code posted
created at 08 Jan 18:59, updated at 05 Nov 19:54
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 |
// Copyright (C) 2006 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** * @fileoverview * some functions for browser-side pretty printing of code contained in html. * * The lexer should work on a number of languages including C and friends, * Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles. * It works passably on Ruby, PHP and Awk and a decent subset of Perl, but, * because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or * CAML-like languages. * * If there's a language not mentioned here, then I don't know it, and don't * know whether it works. If it has a C-like, Bash-like, or XML-like syntax * then it should work passably. * * Usage: * 1) include this source file in an html page via * <script type="text/javascript" src="/path/to/prettify.js"></script> * 2) define style rules. See the example page for examples. * 3) mark the <pre> and <code> tags in your source with class=prettyprint. * You can also use the (html deprecated) <xmp> tag, but the pretty printer * needs to do more substantial DOM manipulations to support that, so some * css styles may not be preserved. * That's it. I wanted to keep the API as simple as possible, so there's no * need to specify which language the code is in. * * Change log: * cbeust, 2006/08/22 * Java annotations (start with "@") are now captured as literals ("lit") */ var PR_keywords = {}; /** initialize the keyword list for our target languages. */ (function () { var CPP_KEYWORDS = "abstract bool break case catch char class const " + "const_cast continue default delete deprecated dllexport dllimport do " + "double dynamic_cast else enum explicit extern false float for friend " + "goto if inline int long mutable naked namespace new noinline noreturn " + "nothrow novtable operator private property protected public register " + "reinterpret_cast return selectany short signed sizeof static " + "static_cast struct switch template this thread throw true try typedef " + "typeid typename union unsigned using declaration, directive uuid " + "virtual void volatile while typeof"; var CSHARP_KEYWORDS = "as base by byte checked decimal delegate descending " + "event finally fixed foreach from group implicit in interface internal " + "into is lock null object out override orderby params readonly ref sbyte " + "sealed stackalloc string select uint ulong unchecked unsafe ushort var"; var JAVA_KEYWORDS = "package synchronized boolean implements import throws " + "instanceof transient extends final strictfp native super"; var JSCRIPT_KEYWORDS = "debugger export function with NaN Infinity"; var PERL_KEYWORDS = "require sub unless until use elsif BEGIN END"; var PYTHON_KEYWORDS = "and assert def del elif except exec global lambda " + "not or pass print raise yield False True None"; var RUBY_KEYWORDS = "then end begin rescue ensure module when undef next " + "redo retry alias defined has_many alias_method has_one belongs_to before_filter " + "after_filter"; var SH_KEYWORDS = "done fi test tail head grep sed seq sendmail cat through " + "cd sudo time"; var KEYWORDS = [CPP_KEYWORDS, CSHARP_KEYWORDS, JAVA_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS, PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS]; for (var k = 0; k < KEYWORDS.length; k++) { var kw = KEYWORDS[k].split(' '); for (var i = 0; i < kw.length; i++) { if (kw[i]) { PR_keywords[kw[i]] = true; } } } }).call(this); // token style names. correspond to css classes /** token style for a string literal */ var PR_STRING = 'str'; /** token style for a keyword */ var PR_KEYWORD = 'kwd'; /** token style for a comment */ var PR_COMMENT = 'com'; /** token style for a type */ var PR_TYPE = 'typ'; /** token style for a literal value. e.g. 1, null, true. */ var PR_LITERAL = 'lit'; /** token style for a punctuation string. */ var PR_PUNCTUATION = 'pun'; /** token style for a punctuation string. */ var PR_PLAIN = 'pln'; /** token style for an sgml tag. */ var PR_TAG = 'tag'; /** token style for a markup declaration such as a DOCTYPE. */ var PR_DECLARATION = 'dec'; /** token style for embedded source. */ var PR_SOURCE = 'src'; /** token style for an sgml attribute name. */ var PR_ATTRIB_NAME = 'atn'; /** token style for an sgml attribute value. */ var PR_ATTRIB_VALUE = 'atv'; /** the number of characters between tab columns */ var PR_TAB_WIDTH = 8; function PR_isWordChar(ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); } function PR_isIE6() { var isIE6 = navigator && navigator.userAgent && /\bMSIE 6\./.test(navigator.userAgent); PR_isIE6 = function () { return isIE6; } return isIE6; } /** Splice one array into another. * Like the python <code> * container[containerPosition:containerPosition + countReplaced] = inserted * </code> * @param {Array} inserted * @param {Array} container modified in place * @param {Number} containerPosition * @param {Number} countReplaced */ function PR_spliceArrayInto( inserted, container, containerPosition, countReplaced) { inserted.unshift(containerPosition, countReplaced || 0); try { container.splice.apply(container, inserted); } finally { inserted.splice(0, 2); } } /** a set of tokens that can precede a regular expression literal in javascript. * http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full * list, but I've removed ones that might be problematic when seen in languages * that don't support regular expression literals. * * <p>Specifically, I've removed any keywords that can't precede a regexp * literal in a syntactically legal javascript program, and I've removed the * "in" keyword since it's not a keyword in many languages, and might be used * as a count of inches. * @private */ var REGEXP_PRECEDER_PATTERN = (function () { var preceders = [ "!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=", "&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=", "->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";", "<", "<<", "<<=", "<=", "=", "==", "===", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "^", "^=", "^^", "^^=", "{", "|", "|=", "||", "||=", "~", "break", "case", "continue", "delete", "do", "else", "finally", "instanceof", "return", "throw", "try", "typeof" ]; var pattern = '(?:' + '(?:(?:^|[^0-9\.])\\.{1,3})|' + // a dot that's not part of a number '(?:(?:^|[^\\+])\\+)|' + // allow + but not ++ '(?:(?:^|[^\\-])-)' // allow - but not -- ; for (var i = 0; i < preceders.length; ++i) { var preceder = preceders[i]; if (PR_isWordChar(preceder.charAt(0))) { pattern += '|\\b' + preceder; } else { pattern += '|' + preceder.replace(/([^=<>:&])/g, '\\$1'); } } pattern += '|^)\\s*$'; // matches at end, and matches empty string return new RegExp(pattern); // CAVEAT: this does not properly handle the case where a regular expression // immediately follows another since a regular expression may have flags // for case-sensitivity and the like. Having regexp tokens adjacent is not // valid in any language I'm aware of, so I'm punting. // TODO: maybe style special characters inside a regexp as punctuation. })(); // Define regexps here so that the interpreter doesn't have to create an object // each time the function containing them is called. // The language spec requires a new object created even if you don't access the // $1 members. var pr_amp = /&/g; var pr_lt = /</g; var pr_gt = />/g; var pr_quot = /\"/g; /** like textToHtml but escapes double quotes to be attribute safe. */ function PR_attribToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>') .replace(pr_quot, '"'); } /** escapest html special characters to html. */ function PR_textToHtml(str) { return str.replace(pr_amp, '&') .replace(pr_lt, '<') .replace(pr_gt, '>'); } var pr_ltEnt = /</g; var pr_gtEnt = />/g; var pr_aposEnt = /'/g; var pr_quotEnt = /"/g; var pr_ampEnt = /&/g; /** unescapes html to plain text. */ function PR_htmlToText(html) { var pos = html.indexOf('&'); if (pos < 0) { return html; } // Handle numeric entities specially. We can't use functional substitution // since that doesn't work in older versions of Safari. // These should be rare since most browsers convert them to normal chars. for (--pos; (pos = html.indexOf('&#', pos + 1)) >= 0;) { var end = html.indexOf(';', pos); if (end >= 0) { var num = html.substring(pos + 3, end); var radix = 10; if (num && num.charAt(0) == 'x') { num = num.substring(1); radix = 16; } var codePoint = parseInt(num, radix); if (!isNaN(codePoint)) { html = (html.substring(0, pos) + String.fromCharCode(codePoint) + html.substring(end + 1)); } } } return html.replace(pr_ltEnt, '<') .replace(pr_gtEnt, '>') .replace(pr_aposEnt, "'") .replace(pr_quotEnt, '"') .replace(pr_ampEnt, '&'); } /** is the given node's innerHTML normally unescaped? */ function PR_isRawContent(node) { return 'XMP' == node.tagName; } var PR_innerHtmlWorks = null; function PR_getInnerHtml(node) { // inner html is hopelessly broken in Safari 2.0.4 when the content is // an html description of well formed XML and the containing tag is a PRE // tag, so we detect that case and emulate innerHTML. if (null === PR_innerHtmlWorks) { var testNode = document.createElement('PRE'); testNode.appendChild( document.createTextNode('<!DOCTYPE foo PUBLIC "foo bar">\n<foo />')); PR_innerHtmlWorks = !/</.test(testNode.innerHTML); } if (PR_innerHtmlWorks) { var content = node.innerHTML; // XMP tags contain unescaped entities so require special handling. if (PR_isRawContent(node)) { content = PR_textToHtml(content); } return content; } var out = []; for (var child = node.firstChild; child; child = child.nextSibling) { PR_normalizedHtml(child, out); } return out.join(''); } /** walks the DOM returning a properly escaped version of innerHTML. */ function PR_normalizedHtml(node, out) { switch (node.nodeType) { case 1: // an element var name = node.tagName.toLowerCase(); out.push('\074', name); for (var i = 0; i < node.attributes.length; ++i) { var attr = node.attributes[i]; if (!attr.specified) { continue; } out.push(' '); PR_normalizedHtml(attr, out); } out.push('>'); for (var child = node.firstChild; child; child = child.nextSibling) { PR_normalizedHtml(child, out); } if (node.firstChild || !/^(?:br|link|img)$/.test(name)) { out.push('<\/', name, '>'); } break; case 2: // an attribute out.push(node.name.toLowerCase(), '="', PR_attribToHtml(node.value), '"'); break; case 3: case 4: // text out.push(PR_textToHtml(node.nodeValue)); break; } } /** returns a function that expand tabs to spaces. This function can be fed * successive chunks of text, and will maintain its own internal state to * keep track of how tabs are expanded. * @return {function (plainText : String) : String } a function that takes * plain text and return the text with tabs expanded. * @private */ function PR_tabExpander(tabWidth) { var SPACES = ' '; var charInLine = 0; return function (plainText) { // walk over each character looking for tabs and newlines. // On tabs, expand them. On newlines, reset charInLine. // Otherwise increment charInLine var out = null; var pos = 0; for (var i = 0, n = plainText.length; i < n; ++i) { var ch = plainText.charAt(i); switch (ch) { case '\t': if (!out) { out = []; } out.push(plainText.substring(pos, i)); // calculate how much space we need in front of this part // nSpaces is the amount of padding -- the number of spaces needed to // move us to the next column, where columns occur at factors of // tabWidth. var nSpaces = tabWidth - (charInLine % tabWidth); charInLine += nSpaces; for (; nSpaces >= 0; nSpaces -= SPACES.length) { out.push(SPACES.substring(0, nSpaces)); } pos = i + 1; break; case '\n': charInLine = 0; break; default: ++charInLine; } } if (!out) { return plainText; } out.push(plainText.substring(pos)); return out.join(''); }; } // The below pattern matches one of the following // (1) /[^<]+/ : A run of characters other than '<' // (2) /<!--.*?-->/: an HTML comment // (3) /<!\[CDATA\[.*?\]\]>/: a cdata section // (3) /<\/?[a-zA-Z][^>]*>/ : A probably tag that should not be highlighted // (4) /</ : A '<' that does not begin a larger chunk. Treated as 1 |
13.8 KB in 13 ms with coderay