Paste: pratt parser in js

Author: erg
Mode: javascript
Date: Fri, 7 Aug 2020 00:07:36
Plain Text |
// based on https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
// node parser.js

class Atom {
  constructor(text) { this.text = text; }
}

class Op {
  constructor(text) { this.text = text; }
}

var tokens = [
  [],
  [new Atom('b'), new Op('-'), new Atom('a')],
  [new Op(')'), new Atom('b'), new Op('-'), new Atom('a'), new Op('(')],
  [new Op(')'), new Atom('b'), new Op('-'), new Atom('a'), new Op('('), new Op('+')],
  [new Atom('c'), new Op(':'), new Atom('b'), new Op('?'), new Atom('a')],
  [new Op(']'), new Atom('c'), new Op('+'), new Atom('b'), new Op('['), new Atom('a')],
  [new Op(']'), new Atom('e'), new Op('['), new Op(']'), new Atom('d'), new Op('['), new Op(']'), new Atom('c'), new Op('+'), new Atom('b'), new Op('['), new Atom('a')],
];

function prefixBindingPower(op) {
  switch(op) {
    case '+': case '-':
      return [null, 9];
    default:
      return [null, null];
  }
}

function postfixBindingPower(op) {
  switch(op) {
    case '!':
      return [11, null];
    case '[':
      return [11, null];
    default:
      return [null, null];
  }
}

function infixBindingPower(op) {
  switch(op) {
    case '=':
      return [2, 1];
    case '?':
      return [4, 3];
    case '+': case '-':
      return [5, 6];
    case '*': case '/':
      return [7, 8];
    case '.':
      return [14, 13];
    default:
      return [null, null];
  }
}

function expr(tokens) {
  return expr_br(tokens, 0);
}

function expr_br(tokens, min_bp) {
  var token = tokens.pop();
  if(!token) {
    throw 'unexpected eof';
  }
  else if(token instanceof(Atom)) {
    var lhs = token;
  }
  else if(token.text === '(') {
    var lhs = expr_br(tokens, 0);
  }
  else if(token instanceof(Op)) {
    var [ignored, r_bp] = prefixBindingPower(token.text);
    var rhs = expr_br(tokens, r_bp);
    var lhs = [token, rhs];
  } else {
    throw 'oops';
  }

  do {
    if(tokens.length === 0) break;

    var op = tokens[tokens.length - 1];
    var [l_bp, ignored] = postfixBindingPower(op.text);
    if(l_bp !== null) {
      if(l_bp < min_bp) {
        break;
      }
      tokens.pop();
      if(op.text === '[') {
        var rhs = expr_br(tokens, 0);
        var dummy = tokens.pop();
        if(dummy.text !== ']') {
          throw 'expected ]';
        }
        var lhs = [op, lhs, rhs];
      } else {
        var lhs = [op, lhs];
      }
      continue;
    }
    var [l_bp, r_bp] = infixBindingPower(op.text);
    if(l_bp !== null || r_bp !== null) {
      if(l_bp < min_bp) {
        break;
      }
      var ignored = tokens.pop();
      if(op.text === '?') {
        var mhs = expr_br(tokens, 0);
        var dummy = tokens.pop();
        if(dummy.text !== ':') {
          throw 'expected :';
        }
        var rhs = expr_br(tokens, r_bp);
        var lhs = [op, lhs, mhs, rhs];

      } else {
        var rhs = expr_br(tokens, r_bp);
        var lhs = [op, lhs, rhs];
      }
      continue;
    }
    break;
  } while(true);
  return lhs;
}

var parsed = expr(tokens[tokens.length - 1]);
console.log('tokens', tokens[tokens.length - 1]);
console.log('parsed', parsed);

New Annotation

Summary:
Author:
Mode:
Body: