Crafting Interpreters:Jlox — Parser Expressions

Ukpa Uchechi
6 min readJan 16, 2024

--

Parser involves taking the tokens and using the grammar rules to evaluate them. Taking a different approach in the article, where I will talk about the code.

Note: I did not free any dynamically allocated memory, as I am still studying arena allocator and plan to refactor later.

These are the grammar rules for the lox which doesn’t have the order of precedence. Order of precedence is important so that a subtraction(-) is not executed before a multiplication(*). The order is association, determines which operator is evaluated first in a series of the same operators.

expression     → literal
| unary
| binary
| grouping ;

literal → NUMBER | STRING | "true" | "false" | "nil" ;
grouping → "(" expression ")" ;
unary → ( "-" | "!" ) expression ;
binary → expression operator expression ;
operator → "==" | "!=" | "<" | "<=" | ">" | ">="
| "+" | "-" | "*" | "/" ;

This is the updated version which does.

expression     → equality ;
equality → comparison ( ( "!=" | "==" ) comparison )* ;
comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
term → factor ( ( "-" | "+" ) factor )* ;
factor → unary ( ( "/" | "*" ) unary )* ;
unary → ( "!" | "-" ) unary
| primary ;
primary → NUMBER | STRING | "true" | "false" | "nil"
| "(" expression ")" ;

Using these rules we can take are tokens and transform them.
First, we interpret these rules in code.

Expr *primary(){

if(match((int[]){FALSE}, 1)) return Literal();
else if(match((int[]){TRUE}, 1)) return Literal();
else if(match((int[]) {NIL}, 1)) return Literal();


else if(match((int[]){NUMBER, STRING}, 2)){
return Literal();
}


else if (match((int []){LEFT_PAREN}, 1)){
Expr *expr = expression();
consume(RIGHT_PAREN, "Expect ')' after expression.");
return Grouping(expr);
}

else {
error(*peek(), "Invalid operand");
longjmp(buf, 1);
}
}

Expr *unary(){
if(match((int[]){BANG, MINUS}, 2)){
TokenArray *operator = previous();
Expr *right = unary();

return Unary(operator, right);
}

return primary();
}


Expr *factor(){
Expr *expr = unary();
while(match((int[]){SLASH, STAR}, 2)){
TokenArray *operator = previous();
Expr *right = unary();

expr = Binary(expr, operator, right);
}

return expr;

}
Expr *term(){
Expr *expr = factor();
while(match((int[]){MINUS, PLUS}, 2)){
TokenArray *operator = previous();
Expr *right = factor();
expr = Binary(expr, operator, right);
}

return expr;
}

Expr *comparison(){
Expr *expr = term();
while(match((int []){GREATER, GREATER_EQUAL, LESS, LESS_EQUAL}, 4)){
TokenArray *operator = previous();
Expr *right = term();
expr = Binary(expr, operator, right);
}

return expr;
}
Expr *equality(){
Expr *expr = comparison();
while (match((int[]){BANG_EQUAL,EQUAL_EQUAL}, 2)){
TokenArray *operator = previous();
Expr *right = comparison();

expr = Binary(expr, operator, right);
}

return expr;
}



Expr *tenary(){
Expr *expr = equality();
while (match((int[]){QUESTION_MARK}, 1)){
Expr *true_statment = equality();
consume(COLON, "expected : after expression");
Expr *false_statement = equality();

expr = Condition(expr, true_statment, false_statement);
}

return expr;
}

Expr *comma(){
Expr *expr = tenary();
while (match((int[]){COMMA}, 1)){
TokenArray *operator = previous();
Expr *right = equality();

expr = Binary(expr, operator, right);
}

return expr;
}

Expr *expression(){
return comma();
}

Two functions that may be strange are the comma and ternary.
It’s from the exercises, adding a comma operator and supporting ternary condition. Listed the questions below

  1. In C, a block is a statement form that allows you to pack a series of statements where a single one is expected. The comma operator is an analogous syntax for expressions. A comma-separated series of expressions can be given where a single expression is expected (except inside a function call’s argument list). At runtime, the comma operator evaluates the left operand and discards the result. Then it evaluates and returns the right operand.
    Add support for comma expressions. Give them the same precedence and associativity as in C. Write the grammar, and then implement the necessary parsing code
  2. Likewise, add support for the C-style conditional or “ternary” operator ?:. What precedence level is allowed between the ? and :? Is the whole operator left-associative or right-associative?
  3. Add error productions to handle each binary operator appearing without a left-hand operand. In other words, detect a binary operator appearing at the beginning of an expression. Report that as an error, but also parse and discard a right-hand operand with the appropriate precedence.

Of course, if you use C I have to define a struct for the listed Expr and use a union type and enum to represent each operation.

typedef struct Expr{
enum ExprType{
EXPR_BINARY,
EXPR_UNARY,
EXPR_LITERAL,
EXPR_GROUPING,
EXPR_CONDITION
}type;

union {
struct {
struct Expr *left;
TokenArray *operator;
struct Expr *right;
} Binary;


struct {
struct Expr *condition;
struct Expr *true_statment;
struct Expr *false_statment;
} Conditon;

struct {
struct Expr* right;
TokenArray *operator;
} Unary;

struct {
struct Expr *expr;
} Grouping;

TokenArray *Literal;
} ast;
} Expr;
Expr *Binary(Expr *left, TokenArray *operator, Expr *right){
Expr *expr = malloc(sizeof(Expr));
expr->type = EXPR_BINARY;
expr->ast.Binary.left = left;
expr->ast.Binary.operator = operator;
expr->ast.Binary.right = right;
return expr;
}


Expr *Condition(Expr *condition, Expr *true_statment, Expr *false_statment){
Expr *expr = malloc(sizeof(Expr));
expr->type = EXPR_CONDITION;
expr->ast.Conditon.condition = condition;
expr->ast.Conditon.false_statment = false_statment;
expr->ast.Conditon.true_statment = true_statment;
return expr;
}

Expr *Unary( TokenArray *operator, Expr *right){
Expr *expr = malloc(sizeof(Expr));
expr->type = EXPR_UNARY;
expr->ast.Unary.operator = operator;
expr->ast.Unary.right = right;
return expr;
}


Expr *Literal(){
Expr *expr = malloc(sizeof(Expr));
expr->type = EXPR_LITERAL;
expr->ast.Literal = &tokens.tokenarray[current - 1];
return expr;
}



Expr *Grouping(Expr *exp){
Expr *expr = malloc(sizeof(Expr));
expr->type = EXPR_GROUPING;
expr->ast.Grouping.expr = exp;
return expr;
}

These functions are used for ast, showing how the code will be executed, in the order of precedence and association.

All the helper functions, to check if we have reached the end, return tokens, etc

static int isAtEnd(){
return tokens.count == current;
}
static TokenArray *peek(){
return &tokens.tokenarray[current];
}

TokenArray *previous(){
return &tokens.tokenarray[current - 1];
}

int check(TokenType type){
if(isAtEnd()) return 0;
TokenArray *token = peek();
return token->type == type;
}
static TokenArray *advance(){
if(!isAtEnd()) current++;
return previous();
}


static int match(int types[], int typesSize){
for(int i = 0; i < typesSize; i++){
if(check(types[i])){
advance();
return 1;
}
}

return 0;
}

void parsererror(TokenArray token, char *message){
error(token,message);
}

TokenArray *consume(TokenType type, char * message){
if(check(type)) return advance();

error(*peek(), message);
longjmp(buf, 1);
}

Use this to visualize the tree and other of execution, even if it will not be needed going forward

static jmp_buf buf;



extern Scanner scanner;
extern Tokens tokens;


static int current = 0;
char buffer[256] = {'\0'};

void astprint(Expr *expression){

switch (expression->type)
{

case EXPR_CONDITION:
astprint(expression->ast.Conditon.condition);
astprint(expression->ast.Conditon.true_statment);
astprint(expression->ast.Conditon.false_statment);

break;
case EXPR_BINARY:
strcat(buffer, getTokenValue(expression->ast.Binary.operator->type));
astprint(expression->ast.Binary.left);
astprint(expression->ast.Binary.right);

break;

case EXPR_UNARY:
strcat(buffer, getTokenValue(expression->ast.Unary.operator->type));
astprint(expression->ast.Unary.right);
break;

case EXPR_GROUPING:
strcat(buffer,"group");
astprint(expression->ast.Grouping.expr);
break;

case EXPR_LITERAL:
int lengthTemp = 0;

for(int i = 0; i < 256; i++){
if(buffer[i] == '\0'){
if(lengthTemp == expression->ast.Literal->length) break;
buffer[i] = *(expression->ast.Literal->ptr + lengthTemp);
lengthTemp += 1;
}
}
break;


default:
break;
}
}

Finally, we call the code.

void parser(){

if(setjmp(buf) == 0){
Expr *expr = expression();
astprint(expr);
printf("%s", buffer);
}else{
// return NULL;
printf("error occured");
}



for(int i = 0; i < 256; i++){
if(buffer[i] == '\0') break;
buffer[i] = '\0';
}

current = 0;
}

Note: that scanner.c file calls the parser, after the tokenization stage.

Longjmp and setjmp are used to get out of nested operations, when an error occurs, when setjmp is first called it captures the state of the stack, and when longjmp is called it reverts the stack to the captured state, with that, unwinding the stack.

After the exercises, our grammar rules look like this, with the addition of the ternary and comma operators.

expression     → comma;
comma → ternary ( (",") ternary)*;
ternary → eqaulity ("?" expression : ":" expression)*;
equality → comparison ( ( "!=" | "==" ) comparison )* ;
comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
term → factor ( ( "-" | "+" ) factor )* ;
factor → unary ( ( "/" | "*" ) unary )* ;
unary → ( "!" | "-" ) unary
| primary ;
primary → NUMBER | STRING | "true" | "false" | "nil"
| "(" expression ")" ;

Now we move to evaluating expressions.

--

--

Ukpa Uchechi
Ukpa Uchechi

Written by Ukpa Uchechi

A Tech Enthusiastic and lover, who loves teaching and learning.

No responses yet