calc

Simple math expression parser.
git clone git://git.amin.space/calc.git
Log | Files | Refs | LICENSE

commit 61844caf78f788b0e47303d34f1b81fadc91ac7b
parent 89878fdbc63a7c562b91847e8020578822f9adbf
Author: amin <dev@aminmesbah.com>
Date:   Sun, 21 Feb 2021 04:43:50 +0000

Parse with recursive descent + precedence climbing

FossilOrigin-Name: 1ad0cce20b7d53a45cdf2667c9d6d687645515e3d990e6a1e39bac3348a44f1a
Diffstat:
Mcalc.c | 494+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 372 insertions(+), 122 deletions(-)

diff --git a/calc.c b/calc.c @@ -1,5 +1,6 @@ #include <assert.h> #include <ctype.h> +#include <stdbool.h> #include <stddef.h> #include <stdint.h> #include <stdio.h> @@ -13,8 +14,7 @@ struct BufHdr { uint8_t buf[]; }; -void *cmalloc(size_t num_bytes) -{ +void *cmalloc(size_t num_bytes) { void *ptr = malloc(num_bytes); if (!ptr) { fprintf(stderr, "crealloc failed\n"); @@ -23,8 +23,7 @@ void *cmalloc(size_t num_bytes) return ptr; } -void *crealloc(void *ptr, size_t num_bytes) -{ +void *crealloc(void *ptr, size_t num_bytes) { ptr = realloc(ptr, num_bytes); if (!ptr) { fprintf(stderr, "crealloc failed\n"); @@ -43,18 +42,14 @@ void *crealloc(void *ptr, size_t num_bytes) #define MAX(x, y) ((x) >= (y) ? (x) : (y)) -void *buf__grow(void *buf, size_t new_length, size_t element_size) -{ +void *buf__grow(void *buf, size_t new_length, size_t element_size) { size_t new_cap = MAX(1 + (2 * buf_cap(buf)), new_length); assert(new_cap >= new_length); size_t new_size = offsetof(struct BufHdr, buf) + (new_cap * element_size); struct BufHdr *new_hdr = NULL; - if (buf) - { + if (buf) { new_hdr = crealloc(buf__hdr(buf), new_size); - } - else - { + } else { new_hdr = cmalloc(new_size); new_hdr->len = 0; } @@ -62,45 +57,63 @@ void *buf__grow(void *buf, size_t new_length, size_t element_size) return new_hdr->buf; } -void buf_test(void) -{ +void buf_test(void) { int *buf = NULL; assert(buf_len(buf) == 0); - for (size_t i = 0; i < 1000; ++i) - { + for (size_t i = 0; i < 1000; ++i) { buf_push(buf, i); } assert(buf_len(buf) == 1000); - for (size_t i = 0; i < buf_len(buf); ++i) - { - printf("%d, ", buf[i]); + for (size_t i = 0; i < buf_len(buf); ++i) { + //printf("%d, ", buf[i]); } - printf("\n"); + //printf("\n"); } enum TokenKind { + TOKEN_NONE = 0, // first 128 values reserved to match up with ascii TOKEN_INT = 128, - TOKEN_NAME, + TOKEN_SHIFT_L, + TOKEN_SHIFT_R, }; -struct Token { - enum TokenKind kind; - union { - uint64_t val; - struct { - const char *start; - const char *end; - }; - }; -}; +typedef struct Token { + uint32_t kind; + uint64_t val; +} Token; + +global_variable char *g_stream; + +#define ERROR() assert(false && "error") + +void print_token(struct Token token) { + switch(token.kind) { + case TOKEN_INT: + printf("TOKEN_INT: %lu\n", token.val); + break; + case TOKEN_SHIFT_L: + printf("TOKEN: '<<'\n"); + break; + case TOKEN_SHIFT_R: + printf("TOKEN: '>>'\n"); + break; + case TOKEN_NONE: // fallthrough + default: + printf("TOKEN: '%c'\n", token.kind); + break; + } +} + +char *lex_next_token(Token *out_t) { + assert(out_t); -global_variable struct Token g_token; -global_variable const char *g_stream; + char *cursor = g_stream; + while (*cursor == ' ') { + cursor++; + } -void lex_next_token() { - switch(*g_stream) - { + switch(*cursor) { case '0': case '1': case '2': @@ -112,114 +125,351 @@ void lex_next_token() { case '8': case '9': { uint64_t val = 0; - while (isdigit(*g_stream)) - { + while (isdigit(*cursor)) { val *= 10; - val += *g_stream++ - '0'; + val += *cursor++ - '0'; } - g_token.kind = TOKEN_INT; - g_token.val = val; + out_t->kind = TOKEN_INT; + out_t->val = val; break; } - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - case 'g': - case 'h': - case 'i': - case 'j': - case 'k': - case 'l': - case 'm': - case 'n': - case 'o': - case 'p': - case 'q': - case 'r': - case 's': - case 't': - case 'u': - case 'v': - case 'w': - case 'x': - case 'y': - case 'z': - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - case 'G': - case 'H': - case 'I': - case 'J': - case 'K': - case 'L': - case 'M': - case 'N': - case 'O': - case 'P': - case 'Q': - case 'R': - case 'S': - case 'T': - case 'U': - case 'V': - case 'W': - case 'X': - case 'Y': - case 'Z': - case '_': { - const char *start = g_stream++; - while (isalnum(*g_stream) || *g_stream == '_') - { - g_stream++; + case '<': + cursor++; + if (*cursor == '<') { + out_t->kind = TOKEN_SHIFT_L; + cursor++; + } else { + ERROR(); + } + break; + case '>': + cursor++; + if (*cursor == '>') { + out_t->kind = TOKEN_SHIFT_R; + cursor++; + } else { + ERROR(); } - g_token.kind = TOKEN_NAME; - g_token.start = start; - g_token.end = g_stream; break; - } default: - g_token.kind = *g_stream++; + out_t->kind = *cursor++; break; } + + return cursor; } -void print_token(struct Token token) -{ - switch(token.kind) - { - case TOKEN_INT: - printf("TOKEN_INT: %lu\n", token.val); +bool token_is_kind(Token t, enum TokenKind kind) { + return t.kind == kind; +} + +Token token_peek(void) { + Token t = {0}; + lex_next_token(&t); + return t; +} + +Token token_consume(void) { + Token t = {0}; + g_stream = lex_next_token(&t); + return t; +} + +Token token_consume_kind(enum TokenKind kind) { + Token t = token_peek(); + if (token_is_kind(t, kind)) { + token_consume(); + } else { + t = (Token){0}; + } + return t; +} + +Token token_require_kind(enum TokenKind kind) { + Token t = token_consume(); + if (!token_is_kind(t, kind)) { + ERROR(); + } + return t; +} + +void lex_test(void) { + char *source = "+()_HELLO 1,23<<>>4+FOO!994"; + g_stream = source; + Token t = token_consume(); + while (t.kind) { + //print_token(t); + t = token_consume(); + } +} + +enum OpKind { + OP_NONE, + + OP_ADD, + OP_SUB, + OP_BIN_OR, + OP_BIN_XOR, + + OP_MUL, + OP_DIV, + OP_MOD, + OP_SHIFT_L, + OP_SHIFT_R, + OP_BIN_AND, + + OP_NEG, + OP_BIN_NOT, +}; + +enum ExprKind { + EXPR_NONE, + EXPR_OPERAND, + EXPR_UNARY, + EXPR_BINARY, +}; + +bool unary_op(enum OpKind *op) { + assert(op); + enum OpKind kind = OP_NONE; + + Token next = token_peek(); + switch (next.kind) { + case '-': + kind = OP_NEG; + break; + case '~': + kind = OP_BIN_NOT; + break; + } + + if (kind != OP_NONE) { + *op = kind; + token_consume(); + return true; + } + + return false; +} + +bool mul_op(enum OpKind *op) { + assert(op); + enum OpKind kind = OP_NONE; + + Token next = token_peek(); + switch (next.kind) { + case '*': + kind = OP_MUL; + break; + case '/': + kind = OP_DIV; + break; + case '%': + kind = OP_MOD; + break; + case TOKEN_SHIFT_L: + kind = OP_SHIFT_L; break; - case TOKEN_NAME: - printf("TOKEN_NAME: %.*s\n", (int)(token.end - token.start), token.start); + case TOKEN_SHIFT_R: + kind = OP_SHIFT_R; break; + } + + if (kind != OP_NONE) { + *op = kind; + token_consume(); + return true; + } + + return false; +} + +bool add_op(enum OpKind *op) { + assert(op); + enum OpKind kind = OP_NONE; + + Token next = token_peek(); + switch (next.kind) { + case '+': + kind = OP_ADD; + break; + case '-': + kind = OP_SUB; + break; + case '|': + kind = OP_BIN_OR; + break; + case '^': + kind = OP_BIN_XOR; + break; + } + + if (kind != OP_NONE) { + *op = kind; + token_consume(); + return true; + } + + return false; +} + +typedef struct Expr Expr; + +struct Expr { + enum ExprKind kind; + enum OpKind op; + Expr *left; + Expr *right; + int32_t operand; +}; + +Expr *expr_alloc(void) { + Expr *e = malloc(sizeof(Expr)); + *e = (Expr) {0}; + return e; +} + +Expr *expr_unary(enum OpKind o, Expr *u) { + Expr *e = expr_alloc(); + *e = (Expr) { + .kind = EXPR_UNARY, + .op = o, + .left = u, + }; + return e; +} + +Expr *expr_binary(enum OpKind o, Expr *l, Expr *r) { + Expr *e = expr_alloc(); + *e = (Expr) { + .kind = EXPR_BINARY, + .op = o, + .left = l, + .right = r, + }; + return e; +} + +void print_expr(Expr *e) { + assert(e); + + char *op = NULL; + switch (e->op) { + case OP_NONE: break; + case OP_ADD: op = "+"; break; + case OP_SUB: op = "-"; break; + case OP_BIN_OR: op = "|"; break; + case OP_BIN_XOR: op = "^"; break; + case OP_MUL: op = "*"; break; + case OP_DIV: op = "/"; break; + case OP_MOD: op = "%"; break; + case OP_SHIFT_L: op = "<<"; break; + case OP_SHIFT_R: op = ">>"; break; + case OP_BIN_AND: op = "&"; break; + case OP_NEG: op = "-"; break; + case OP_BIN_NOT: op = "~"; break; default: - printf("TOKEN: '%c'\n", token.kind); + ERROR(); break; } + + if (e->kind != EXPR_OPERAND) { + printf("("); + } + + switch (e->kind) { + case EXPR_OPERAND: + printf("%i", e->operand); + break; + case EXPR_UNARY: + if (op) { + printf("%s ", op); + } + print_expr(e->left); + break; + case EXPR_BINARY: + printf("%s ", op); + print_expr(e->left); + printf(" "); + print_expr(e->right); + break; + case EXPR_NONE: // fallthrough + default: + ERROR(); + break; + } + + if (e->kind != EXPR_OPERAND) { + printf(")"); + } } -void lex_test() -{ - char *source = "+()_HELLO1,234+FOO!994"; - g_stream = source; - lex_next_token(); - while (g_token.kind) - { - print_token(g_token); - lex_next_token(); +int32_t parse_number(void) { + Token t = token_require_kind(TOKEN_INT); + return t.val; +} + +Expr *parse_expr_operand(void) { + Expr *e = expr_alloc(); + *e = (Expr) { + .operand = parse_number(), + .kind = EXPR_OPERAND, + }; + return e; +} + +Expr *parse_expr_unary(void) { + enum OpKind op = 0; + if (unary_op(&op)) { + return expr_unary(op, parse_expr_unary()); + } else { + return parse_expr_operand(); + } +} + +Expr *parse_expr_mul(void) { + Expr *e = parse_expr_unary(); + enum OpKind op = 0; + while (mul_op(&op)) { + e = expr_binary(op, e, parse_expr_unary()); } + return e; +} + +Expr *parse_expr_add(void) { + Expr *e = parse_expr_mul(); + enum OpKind op = 0; + while (add_op(&op)) { + e = expr_binary(op, e, parse_expr_mul()); + } + + return e; +} + +Expr *parse_expr(void) { + return parse_expr_add(); +} + +// precedence climbing +// op_unary = "-" | "~". +// op_mul = "*" | "/" | "%" | "<<" | ">>" | "&". +// op_add = "+" | "-" | "|" | "^". +// exp_operand = number. +// expr_unary = ([op_unary] exp_unary) | exp_base. +// expr_mul = expr_unary {op_mul expr_unary}. +// expr_add = expr_mul {op_add expr_mul}. +// expr = expr_add. +// number = "0" .. "9" {"0" .. "9"}. +void parse_test(void) { + char *source = "12*34 + 45/56 + -25"; + printf("%s\n", source); + g_stream = source; + Expr *e = parse_expr(); + print_expr(e); } -int main(void) -{ +int main(void) { buf_test(); lex_test(); + parse_test(); }