// Copyright (C) 2006 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** * @fileoverview * some functions for browser-side pretty printing of code contained in html. * * The lexer should work on a number of languages including C and friends, * Java, Python, Bash, SQL, HTML, XML, CSS, Javascript, and Makefiles. * It works passably on Ruby, PHP and Awk and a decent subset of Perl, but, * because of commenting conventions, doesn't work on Smalltalk, Lisp-like, or * CAML-like languages. * * If there's a language not mentioned here, then I don't know it, and don't * know whether it works. If it has a C-like, Bash-like, or XML-like syntax * then it should work passably. * * Usage: * 1) include this source file in an html page via * * 2) define style rules. See the example page for examples. * 3) mark the
 and  tags in your source with class=prettyprint.
 *    You can also use the (html deprecated)  tag, but the pretty printer
 *    needs to do more substantial DOM manipulations to support that, so some
 *    css styles may not be preserved.
 * That's it.  I wanted to keep the API as simple as possible, so there's no
 * need to specify which language the code is in.
 *
 * Change log:
 * cbeust, 2006/08/22
 *   Java annotations (start with "@") are now captured as literals ("lit")
 */
var PR_keywords = new Object();
/** initialize the keyword list for our target languages. */
(function () {
  var CPP_KEYWORDS = (
    "bool break case catch char class const const_cast continue default " +
    "delete deprecated dllexport dllimport do double dynamic_cast else enum " +
    "explicit extern false float for friend goto if inline int long mutable " +
    "naked namespace new noinline noreturn nothrow novtable operator private " +
    "property protected public register reinterpret_cast return selectany " +
    "short signed sizeof static static_cast struct switch template this " +
    "thread throw true try typedef typeid typename union unsigned using " +
    "declaration, using directive uuid virtual void volatile while typeof");
  var JAVA_KEYWORDS = (
    "abstract default goto package synchronized boolean do if private this " +
    "break double implements protected throw byte else import public throws " +
    "case enum instanceof return transient catch extends int short try char " +
    "final interface static void class finally long strictfp volatile const " +
    "float native super while continue for new switch");
  var PYTHON_KEYWORDS = (
    "and assert break class continue def del elif else except exec finally " +
    "for from global if import in is lambda not or pass print raise return " +
    "try while yield False True None");
  var JSCRIPT_KEYWORDS = (
    "abstract boolean break byte case catch char class const continue " +
    "debugger default delete do double else enum export extends false final " +
    "finally float for function goto if implements import in instanceof int " +
    "interface long native new null package private protected public return " +
    "short static super switch synchronized this throw throws transient " +
    "true try typeof var void volatile while with NaN Infinity");
  var PERL_KEYWORDS = (
    "foreach require sub unless until use elsif BEGIN END");
  var SH_KEYWORDS = (
    "if then do done else fi end");
  var RUBY_KEYWORDS = (
      "if then elsif else end begin do rescue ensure while for class module " +
      "def yield raise until unless and or not when case super undef break " +
      "next redo retry in return alias defined");
  var KEYWORDS = [CPP_KEYWORDS, JAVA_KEYWORDS, JSCRIPT_KEYWORDS, PERL_KEYWORDS,
                  PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS];
  for (var k = 0; k < KEYWORDS.length; k++) {
    var kw = KEYWORDS[k].split(' ');
    for (var i = 0; i < kw.length; i++) {
      if (kw[i]) { PR_keywords[kw[i]] = true; }
    }
  }
}).call(this);
// token style names.  correspond to css classes
/** token style for a string literal */
var PR_STRING = 'str';
/** token style for a keyword */
var PR_KEYWORD = 'kwd';
/** token style for a comment */
var PR_COMMENT = 'com';
/** token style for a type */
var PR_TYPE = 'typ';
/** token style for a literal value.  e.g. 1, null, true. */
var PR_LITERAL = 'lit';
/** token style for a punctuation string. */
var PR_PUNCTUATION = 'pun';
/** token style for a punctuation string. */
var PR_PLAIN = 'pln';
/** token style for an sgml tag. */
var PR_TAG = 'tag';
/** token style for a markup declaration such as a DOCTYPE. */
var PR_DECLARATION = 'dec';
/** token style for embedded source. */
var PR_SOURCE = 'src';
/** token style for an sgml attribute name. */
var PR_ATTRIB_NAME = 'atn';
/** token style for an sgml attribute value. */
var PR_ATTRIB_VALUE = 'atv';
/** the position of the end of a token during.  A division of a string into
  * n tokens can be represented as a series n - 1 token ends, as long as
  * runs of whitespace warrant their own token.
  * @private
  */
function PR_TokenEnd(end, style) {
  if (undefined === style) { throw new Error('BAD'); }
  if ('number' != typeof(end)) { throw new Error('BAD'); }
  this.end = end;
  this.style = style;
}
PR_TokenEnd.prototype.toString = function () {
  return '[PR_TokenEnd ' + this.end +
    (this.style ? ':' + this.style : '') + ']';
};
/** a chunk of text with a style.  These are used to represent both the output
  * from the lexing functions as well as intermediate results.
  * @constructor
  * @param token the token text
  * @param style one of the token styles defined in designdoc-template, or null
  *   for a styleless token, such as an embedded html tag.
  * @private
  */
function PR_Token(token, style) {
  if (undefined === style) { throw new Error('BAD'); }
  this.token = token;
  this.style = style;
}
PR_Token.prototype.toString = function () {
  return '[PR_Token ' + this.token + (this.style ? ':' + this.style : '') + ']';
};
/** a helper class that decodes common html entities used to escape source and
  * markup punctuation characters in html.
  * @constructor
  * @private
  */
function PR_DecodeHelper() {
  this.next = 0;
  this.ch = '\0';
}
PR_DecodeHelper.prototype.decode = function (s, i) {
  var next = i + 1;
  var ch = s.charAt(i);
  if ('&' == ch) {
    var semi = s.indexOf(';', next);
    if (semi >= 0 && semi < next + 4) {
      var entityName = s.substring(next, semi).toLowerCase();
      next = semi + 1;
      if ('lt' == entityName) {
        ch = '<';
      } else if ('gt' == entityName) {
        ch = '>';
      } else if ('quot' == entityName) {
        ch = '"';
      } else if ('apos' == entityName) {
        ch = '\'';
      } else if ('amp' == entityName) {
        ch = '&';
      } else {
        next = i + 1;
      }
    }
  }
  this.next = next;
  this.ch = ch;
  return this.ch;
}
// some string utilities
function PR_isWordChar(ch) {
  return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
}
function PR_isIdentifierStart(ch) {
  return PR_isWordChar(ch) || ch == '_' || ch == '$' || ch == '@';
}
function PR_isIdentifierPart(ch) {
  return PR_isIdentifierStart(ch) || PR_isDigitChar(ch);
}
function PR_isSpaceChar(ch) {
  return "\t \r\n".indexOf(ch) >= 0;
}
function PR_isDigitChar(ch) {
  return ch >= '0' && ch <= '9';
}
function PR_trim(s) {
  var i = 0, j = s.length - 1;
  while (i <= j && PR_isSpaceChar(s.charAt(i))) { ++i; }
  while (j > i && PR_isSpaceChar(s.charAt(j))) { --j; }
  return s.substring(i, j + 1);
}
function PR_startsWith(s, prefix) {
  return s.length >= prefix.length && prefix == s.substring(0, prefix.length);
}
function PR_endsWith(s, suffix) {
  return s.length >= suffix.length &&
         suffix == s.substring(s.length - suffix.length, s.length);
}
/** true iff prefix matches the first prefix characters in chars[0:len].
  * @private
  */
function PR_prefixMatch(chars, len, prefix) {
  if (len < prefix.length) { return false; }
  for (var i = 0, n = prefix.length; i < n; ++i) {
    if (prefix.charAt(i) != chars[i]) { return false; }
  }
  return true;
}
/** like textToHtml but escapes double quotes to be attribute safe. */
function PR_attribToHtml(str) {
  return str.replace(/&/g, '&')
    .replace(//g, '>')
    .replace(/\"/g, '"')
    .replace(/\xa0/, ' ');
}
/** escapest html special characters to html. */
function PR_textToHtml(str) {
  return str.replace(/&/g, '&')
    .replace(//g, '>')
    .replace(/\xa0/g, ' ');
}
/** is the given node's innerHTML normally unescaped? */
function PR_isRawContent(node) {
  return 'XMP' == node.tagName;
}
var PR_innerHtmlWorks = null;
function PR_getInnerHtml(node) {
  // inner html is hopelessly broken in Safari 2.0.4 when the content is
  // an html description of well formed XML and the containing tag is a PRE
   // tag, so we detect that case and emulate innerHTML.
  if (null == PR_innerHtmlWorks) {
    var testNode = document.createElement('PRE');
    testNode.appendChild(
        document.createTextNode('\n