class JSTokenizer

  1. cis7 sites/all/modules/ulmus/advagg/advagg_js_compress/jsminplus.inc JSTokenizer
  2. cle7 sites/all/modules/ulmus/advagg/advagg_js_compress/jsminplus.inc JSTokenizer
  3. elmsmedia7 sites/all/modules/ulmus/advagg/advagg_js_compress/jsminplus.inc JSTokenizer
  4. icor7 sites/all/modules/ulmus/advagg/advagg_js_compress/jsminplus.inc JSTokenizer
  5. meedjum_blog7 sites/all/modules/ulmus/advagg/advagg_js_compress/jsminplus.inc JSTokenizer
  6. mooc7 sites/all/modules/ulmus/advagg/advagg_js_compress/jsminplus.inc JSTokenizer

Hierarchy

Expanded class hierarchy of JSTokenizer

Members

Contains filters are case sensitive
Namesort descending Modifiers Type Description
JSTokenizer::$assignOps private property
JSTokenizer::$cursor private property
JSTokenizer::$filename public property
JSTokenizer::$keywords private property
JSTokenizer::$lineno public property
JSTokenizer::$lookahead public property
JSTokenizer::$opRegExp private property
JSTokenizer::$opTypeNames private property
JSTokenizer::$scanNewlines public property
JSTokenizer::$scanOperand public property
JSTokenizer::$source private property
JSTokenizer::$tokenIndex public property
JSTokenizer::$tokens public property
JSTokenizer::currentToken public function
JSTokenizer::get public function
JSTokenizer::getInput public function
JSTokenizer::init public function
JSTokenizer::isDone public function
JSTokenizer::match public function
JSTokenizer::mustMatch public function
JSTokenizer::newSyntaxError public function
JSTokenizer::peek public function
JSTokenizer::peekOnSameLine public function
JSTokenizer::unget public function
JSTokenizer::unicodeEscapeCallback public static function
JSTokenizer::__construct public function

File

sites/all/modules/ulmus/advagg/advagg_js_compress/jsminplus.inc, line 1840
JSMinPlus version 1.4

View source
class JSTokenizer {
  private $cursor = 0;
  private $source;

  public $tokens = array();
  public $tokenIndex = 0;
  public $lookahead = 0;
  public $scanNewlines = false;
  public $scanOperand = true;

  public $filename;
  public $lineno;

  private $keywords = array(
    'break',
    'case',
    'catch',
    'const',
    'continue',
    'debugger',
    'default',
    'delete',
    'do',
    'else',
    'enum',
    'false',
    'finally',
    'for',
    'function',
    'if',
    'in',
    'instanceof',
    'new',
    'null',
    'return',
    'switch',
    'this',
    'throw',
    'true',
    'try',
    'typeof',
    'var',
    'void',
    'while',
    'with',
  );

  private $opTypeNames = array(
    ';',
    ',',
    '?',
    ':',
    '||',
    '&&',
    '|',
    '^',
    '&',
    '===',
    '==',
    '=',
    '!==',
    '!=',
    '<<',
    '<=',
    '<',
    '>>>',
    '>>',
    '>=',
    '>',
    '++',
    '--',
    '+',
    '-',
    '*',
    '/',
    '%',
    '!',
    '~',
    '.',
    '[',
    ']',
    '{',
    '}',
    '(',
    ')',
    '@*/',
  );

  private $assignOps = array(
    '|',
    '^',
    '&',
    '<<',
    '>>',
    '>>>',
    '+',
    '-',
    '*',
    '/',
    '%',
  );
  private $opRegExp;

  public function __construct() {
    $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#';
  }

  public function init($source, $filename = '', $lineno = 1) {
    $this->source = $source;
    $this->filename = $filename ? $filename : '[inline]';
    $this->lineno = $lineno;

    $this->cursor = 0;
    $this->tokens = array();
    $this->tokenIndex = 0;
    $this->lookahead = 0;
    $this->scanNewlines = false;
    $this->scanOperand = true;
  }

  public function getInput($chunksize) {
    if ($chunksize) {
      return substr($this->source, $this->cursor, $chunksize);
    }

    return substr($this->source, $this->cursor);
  }

  public function isDone() {
    return $this->peek() == TOKEN_END;
  }

  public function match($tt) {
    return $this->get() == $tt || $this->unget();
  }

  public function mustMatch($tt) {
    if (!$this->match($tt)) {
      throw $this->newSyntaxError('Unexpected token; token ' . $tt . ' expected');
    }

    return $this->currentToken();
  }

  public function peek() {
    if ($this->lookahead) {
      $next = $this->tokens[($this->tokenIndex + $this->lookahead) & 3];
      if ($this->scanNewlines && $next->lineno != $this->lineno) {
        $tt = TOKEN_NEWLINE;
      }
      else {
        $tt = $next->type;
      }
    }
    else {
      $tt = $this->get();
      $this->unget();
    }

    return $tt;
  }

  public function peekOnSameLine() {
    $this->scanNewlines = true;
    $tt = $this->peek();
    $this->scanNewlines = false;

    return $tt;
  }

  public function currentToken() {
    if (!empty($this->tokens)) {
      return $this->tokens[$this->tokenIndex];
    }
  }

  public function get($chunksize = 1000) {
    while ($this->lookahead) {
      $this->lookahead--;
      $this->tokenIndex = ($this->tokenIndex + 1) & 3;
      $token = $this->tokens[$this->tokenIndex];
      if ($token->type != TOKEN_NEWLINE || $this->scanNewlines) {
        return $token->type;
      }
    }

    $conditional_comment = false;

    // strip whitespace and comments
    while (true) {
      $input = $this->getInput($chunksize);

      // whitespace handling; gobble up \r as well (effectively we don't have support for MAC newlines!)
      $re = $this->scanNewlines ? '/^[ \r\t]+/' : '/^\s+/';
      if (preg_match($re, $input, $match)) {
        $spaces = $match[0];
        $spacelen = strlen($spaces);
        $this->cursor += $spacelen;
        if (!$this->scanNewlines) {
          $this->lineno += substr_count($spaces, "\n");
        }

        if ($spacelen == $chunksize) {
          continue; // complete chunk contained whitespace
        }

        $input = $this->getInput($chunksize);
        if ($input == '' || $input[0] != '/') {
          break;
        }
      }

      // Comments
      if (!preg_match('/^\/(?:\*(@(?:cc_on|if|elif|else|end))?.*?\*\/|\/[^\n]*)/s', $input, $match)) {
        if (!$chunksize) {
          break;
        }

        // retry with a full chunk fetch; this also prevents breakage of long regular expressions (which will never match a comment)
        $chunksize = null;
        continue;
      }

      // check if this is a conditional (JScript) comment
      if (!empty($match[1])) {
        $match[0] = '/*' . $match[1];
        $conditional_comment = true;
        break;
      }
      else {
        $this->cursor += strlen($match[0]);
        $this->lineno += substr_count($match[0], "\n");
      }
    }

    if ($input == '') {
      $tt = TOKEN_END;
      $match = array('');
    }
    elseif ($conditional_comment) {
      $tt = TOKEN_CONDCOMMENT_START;
    }
    else {
      switch ($input[0]) {
        case '0':
          // hexadecimal
          if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match)) {
            $tt = TOKEN_NUMBER;
            break;
          }
          // FALL THROUGH

        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
          // should always match
          preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match);
          $tt = TOKEN_NUMBER;
          break;

        case "'":
          if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match)) {
            $tt = TOKEN_STRING;
          }
          else {
            if ($chunksize) {
              return $this->get(null); // retry with a full chunk fetch
            }

            throw $this->newSyntaxError('Unterminated string literal');
          }
          break;

        case '"':
          if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match)) {
            $tt = TOKEN_STRING;
          }
          else {
            if ($chunksize) {
              return $this->get(null); // retry with a full chunk fetch
            }

            throw $this->newSyntaxError('Unterminated string literal');
          }
          break;

        case '/':
          if ($this->scanOperand && preg_match('/^\/((?:\\\\.|\[(?:\\\\.|[^\]])*\]|[^\/])+)\/([gimy]*)/', $input, $match)) {
            $tt = TOKEN_REGEXP;
            break;
          }
          // FALL THROUGH

        case '|':
        case '^':
        case '&':
        case '<':
        case '>':
        case '+':
        case '-':
        case '*':
        case '%':
        case '=':
        case '!':
          // should always match
          preg_match($this->opRegExp, $input, $match);
          $op = $match[0];
          if (in_array($op, $this->assignOps) && $input[strlen($op)] == '=') {
            $tt = OP_ASSIGN;
            $match[0] .= '=';
          }
          else {
            $tt = $op;
            if ($this->scanOperand) {
              if ($op == OP_PLUS) {
                $tt = OP_UNARY_PLUS;
              }
              elseif ($op == OP_MINUS) {
                $tt = OP_UNARY_MINUS;
              }
            }
            $op = null;
          }
          break;

        case '.':
          if (preg_match('/^\.\d+(?:[eE][-+]?\d+)?/', $input, $match)) {
            $tt = TOKEN_NUMBER;
            break;
          }
          // FALL THROUGH

        case ';':
        case ',':
        case '?':
        case ':':
        case '~':
        case '[':
        case ']':
        case '{':
        case '}':
        case '(':
        case ')':
          // these are all single
          $match = array($input[0]);
          $tt = $input[0];
          break;

        case '@':
          // check end of conditional comment
          if (substr($input, 0, 3) == '@*/') {
            $match = array('@*/');
            $tt = TOKEN_CONDCOMMENT_END;
          }
          else {
            throw $this->newSyntaxError('Illegal token');
          }
          break;

        case "\n":
          if ($this->scanNewlines) {
            $match = array("\n");
            $tt = TOKEN_NEWLINE;
          }
          else {
            throw $this->newSyntaxError('Illegal token');
          }
          break;

        default:
          // Fast path for identifiers: word chars followed by whitespace or various other tokens.
          // Note we don't need to exclude digits in the first char, as they've already been found
          // above.
          if (!preg_match('/^[$\w]+(?=[\s\/\|\^\&<>\+\-\*%=!.;,\?:~\[\]\{\}\(\)@])/', $input, $match)) {
            // Character classes per ECMA-262 edition 5.1 section 7.6
            // Per spec, must accept Unicode 3.0, *may* accept later versions.
            // We'll take whatever PCRE understands, which should be more recent.
            $identifierStartChars = "\\p{L}\\p{Nl}" . # UnicodeLetter
            "\$" . "_";
            $identifierPartChars = $identifierStartChars . "\\p{Mn}\\p{Mc}" . # UnicodeCombiningMark
            "\\p{Nd}" . # UnicodeDigit
            "\\p{Pc}"; # UnicodeConnectorPunctuation
            $unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
            $identifierRegex = "/^" . "(?:[$identifierStartChars]|$unicodeEscape)" . "(?:[$identifierPartChars]|$unicodeEscape)*" . "/uS";
            if (preg_match($identifierRegex, $input, $match)) {
              if (strpos($match[0], '\\') !== false) {
                // Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
                // the original chars, but only within the boundaries of the identifier.
                $decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/', array(__CLASS__, 'unicodeEscapeCallback'), $match[0]);

                // Since our original regex didn't de-escape the originals, we need to check for validity again.
                // No need to worry about token boundaries, as anything outside the identifier is illegal!
                if (!preg_match("/^[$identifierStartChars][$identifierPartChars]*$/u", $decoded)) {
                  throw $this->newSyntaxError('Illegal token');
                }

                // Per spec it _ought_ to work to use these escapes for keywords words as well...
                // but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
                // that don't match the keyword.
                if (in_array($decoded, $this->keywords)) {
                  throw $this->newSyntaxError('Illegal token');
                }

                // TODO: save the decoded form for output?
              }
            }
            else {
              throw $this->newSyntaxError('Illegal token');
            }
          }
          $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
      }
    }

    $this->tokenIndex = ($this->tokenIndex + 1) & 3;

    if (!isset($this->tokens[$this->tokenIndex])) {
      $this->tokens[$this->tokenIndex] = new JSToken();
    }

    $token = $this->tokens[$this->tokenIndex];
    $token->type = $tt;

    if ($tt == OP_ASSIGN) {
      $token->assignOp = $op;
    }

    $token->start = $this->cursor;

    $token->value = $match[0];
    $this->cursor += strlen($match[0]);

    $token->end = $this->cursor;
    $token->lineno = $this->lineno;

    return $tt;
  }

  public function unget() {
    if (++$this->lookahead == 4) {
      throw $this->newSyntaxError('PANIC: too much lookahead!');
    }

    $this->tokenIndex = ($this->tokenIndex - 1) & 3;
  }

  public function newSyntaxError($m) {
    return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
  }

  public static function unicodeEscapeCallback($m) {
    return html_entity_decode('&#x' . $m[1] . ';', ENT_QUOTES, 'UTF-8');
  }
}