commit 61844caf78f788b0e47303d34f1b81fadc91ac7b
parent 89878fdbc63a7c562b91847e8020578822f9adbf
Author: amin <dev@aminmesbah.com>
Date: Sun, 21 Feb 2021 04:43:50 +0000
Parse with recursive descent + precedence climbing
FossilOrigin-Name: 1ad0cce20b7d53a45cdf2667c9d6d687645515e3d990e6a1e39bac3348a44f1a
Diffstat:
M | calc.c | | | 494 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------- |
1 file changed, 372 insertions(+), 122 deletions(-)
diff --git a/calc.c b/calc.c
@@ -1,5 +1,6 @@
#include <assert.h>
#include <ctype.h>
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
@@ -13,8 +14,7 @@ struct BufHdr {
uint8_t buf[];
};
-void *cmalloc(size_t num_bytes)
-{
+void *cmalloc(size_t num_bytes) {
void *ptr = malloc(num_bytes);
if (!ptr) {
fprintf(stderr, "crealloc failed\n");
@@ -23,8 +23,7 @@ void *cmalloc(size_t num_bytes)
return ptr;
}
-void *crealloc(void *ptr, size_t num_bytes)
-{
+void *crealloc(void *ptr, size_t num_bytes) {
ptr = realloc(ptr, num_bytes);
if (!ptr) {
fprintf(stderr, "crealloc failed\n");
@@ -43,18 +42,14 @@ void *crealloc(void *ptr, size_t num_bytes)
#define MAX(x, y) ((x) >= (y) ? (x) : (y))
-void *buf__grow(void *buf, size_t new_length, size_t element_size)
-{
+void *buf__grow(void *buf, size_t new_length, size_t element_size) {
size_t new_cap = MAX(1 + (2 * buf_cap(buf)), new_length);
assert(new_cap >= new_length);
size_t new_size = offsetof(struct BufHdr, buf) + (new_cap * element_size);
struct BufHdr *new_hdr = NULL;
- if (buf)
- {
+ if (buf) {
new_hdr = crealloc(buf__hdr(buf), new_size);
- }
- else
- {
+ } else {
new_hdr = cmalloc(new_size);
new_hdr->len = 0;
}
@@ -62,45 +57,63 @@ void *buf__grow(void *buf, size_t new_length, size_t element_size)
return new_hdr->buf;
}
-void buf_test(void)
-{
+void buf_test(void) {
int *buf = NULL;
assert(buf_len(buf) == 0);
- for (size_t i = 0; i < 1000; ++i)
- {
+ for (size_t i = 0; i < 1000; ++i) {
buf_push(buf, i);
}
assert(buf_len(buf) == 1000);
- for (size_t i = 0; i < buf_len(buf); ++i)
- {
- printf("%d, ", buf[i]);
+ for (size_t i = 0; i < buf_len(buf); ++i) {
+ //printf("%d, ", buf[i]);
}
- printf("\n");
+ //printf("\n");
}
enum TokenKind {
+ TOKEN_NONE = 0,
// first 128 values reserved to match up with ascii
TOKEN_INT = 128,
- TOKEN_NAME,
+ TOKEN_SHIFT_L,
+ TOKEN_SHIFT_R,
};
-struct Token {
- enum TokenKind kind;
- union {
- uint64_t val;
- struct {
- const char *start;
- const char *end;
- };
- };
-};
+typedef struct Token {
+ uint32_t kind;
+ uint64_t val;
+} Token;
+
+global_variable char *g_stream;
+
+#define ERROR() assert(false && "error")
+
+void print_token(struct Token token) {
+ switch(token.kind) {
+ case TOKEN_INT:
+ printf("TOKEN_INT: %lu\n", token.val);
+ break;
+ case TOKEN_SHIFT_L:
+ printf("TOKEN: '<<'\n");
+ break;
+ case TOKEN_SHIFT_R:
+ printf("TOKEN: '>>'\n");
+ break;
+ case TOKEN_NONE: // fallthrough
+ default:
+ printf("TOKEN: '%c'\n", token.kind);
+ break;
+ }
+}
+
+char *lex_next_token(Token *out_t) {
+ assert(out_t);
-global_variable struct Token g_token;
-global_variable const char *g_stream;
+ char *cursor = g_stream;
+ while (*cursor == ' ') {
+ cursor++;
+ }
-void lex_next_token() {
- switch(*g_stream)
- {
+ switch(*cursor) {
case '0':
case '1':
case '2':
@@ -112,114 +125,351 @@ void lex_next_token() {
case '8':
case '9': {
uint64_t val = 0;
- while (isdigit(*g_stream))
- {
+ while (isdigit(*cursor)) {
val *= 10;
- val += *g_stream++ - '0';
+ val += *cursor++ - '0';
}
- g_token.kind = TOKEN_INT;
- g_token.val = val;
+ out_t->kind = TOKEN_INT;
+ out_t->val = val;
break;
}
- case 'a':
- case 'b':
- case 'c':
- case 'd':
- case 'e':
- case 'f':
- case 'g':
- case 'h':
- case 'i':
- case 'j':
- case 'k':
- case 'l':
- case 'm':
- case 'n':
- case 'o':
- case 'p':
- case 'q':
- case 'r':
- case 's':
- case 't':
- case 'u':
- case 'v':
- case 'w':
- case 'x':
- case 'y':
- case 'z':
- case 'A':
- case 'B':
- case 'C':
- case 'D':
- case 'E':
- case 'F':
- case 'G':
- case 'H':
- case 'I':
- case 'J':
- case 'K':
- case 'L':
- case 'M':
- case 'N':
- case 'O':
- case 'P':
- case 'Q':
- case 'R':
- case 'S':
- case 'T':
- case 'U':
- case 'V':
- case 'W':
- case 'X':
- case 'Y':
- case 'Z':
- case '_': {
- const char *start = g_stream++;
- while (isalnum(*g_stream) || *g_stream == '_')
- {
- g_stream++;
+ case '<':
+ cursor++;
+ if (*cursor == '<') {
+ out_t->kind = TOKEN_SHIFT_L;
+ cursor++;
+ } else {
+ ERROR();
+ }
+ break;
+ case '>':
+ cursor++;
+ if (*cursor == '>') {
+ out_t->kind = TOKEN_SHIFT_R;
+ cursor++;
+ } else {
+ ERROR();
}
- g_token.kind = TOKEN_NAME;
- g_token.start = start;
- g_token.end = g_stream;
break;
- }
default:
- g_token.kind = *g_stream++;
+ out_t->kind = *cursor++;
break;
}
+
+ return cursor;
}
-void print_token(struct Token token)
-{
- switch(token.kind)
- {
- case TOKEN_INT:
- printf("TOKEN_INT: %lu\n", token.val);
+bool token_is_kind(Token t, enum TokenKind kind) {
+ return t.kind == kind;
+}
+
+Token token_peek(void) {
+ Token t = {0};
+ lex_next_token(&t);
+ return t;
+}
+
+Token token_consume(void) {
+ Token t = {0};
+ g_stream = lex_next_token(&t);
+ return t;
+}
+
+Token token_consume_kind(enum TokenKind kind) {
+ Token t = token_peek();
+ if (token_is_kind(t, kind)) {
+ token_consume();
+ } else {
+ t = (Token){0};
+ }
+ return t;
+}
+
+Token token_require_kind(enum TokenKind kind) {
+ Token t = token_consume();
+ if (!token_is_kind(t, kind)) {
+ ERROR();
+ }
+ return t;
+}
+
+void lex_test(void) {
+ char *source = "+()_HELLO 1,23<<>>4+FOO!994";
+ g_stream = source;
+ Token t = token_consume();
+ while (t.kind) {
+ //print_token(t);
+ t = token_consume();
+ }
+}
+
+enum OpKind {
+ OP_NONE,
+
+ OP_ADD,
+ OP_SUB,
+ OP_BIN_OR,
+ OP_BIN_XOR,
+
+ OP_MUL,
+ OP_DIV,
+ OP_MOD,
+ OP_SHIFT_L,
+ OP_SHIFT_R,
+ OP_BIN_AND,
+
+ OP_NEG,
+ OP_BIN_NOT,
+};
+
+enum ExprKind {
+ EXPR_NONE,
+ EXPR_OPERAND,
+ EXPR_UNARY,
+ EXPR_BINARY,
+};
+
+bool unary_op(enum OpKind *op) {
+ assert(op);
+ enum OpKind kind = OP_NONE;
+
+ Token next = token_peek();
+ switch (next.kind) {
+ case '-':
+ kind = OP_NEG;
+ break;
+ case '~':
+ kind = OP_BIN_NOT;
+ break;
+ }
+
+ if (kind != OP_NONE) {
+ *op = kind;
+ token_consume();
+ return true;
+ }
+
+ return false;
+}
+
+bool mul_op(enum OpKind *op) {
+ assert(op);
+ enum OpKind kind = OP_NONE;
+
+ Token next = token_peek();
+ switch (next.kind) {
+ case '*':
+ kind = OP_MUL;
+ break;
+ case '/':
+ kind = OP_DIV;
+ break;
+ case '%':
+ kind = OP_MOD;
+ break;
+ case TOKEN_SHIFT_L:
+ kind = OP_SHIFT_L;
break;
- case TOKEN_NAME:
- printf("TOKEN_NAME: %.*s\n", (int)(token.end - token.start), token.start);
+ case TOKEN_SHIFT_R:
+ kind = OP_SHIFT_R;
break;
+ }
+
+ if (kind != OP_NONE) {
+ *op = kind;
+ token_consume();
+ return true;
+ }
+
+ return false;
+}
+
+bool add_op(enum OpKind *op) {
+ assert(op);
+ enum OpKind kind = OP_NONE;
+
+ Token next = token_peek();
+ switch (next.kind) {
+ case '+':
+ kind = OP_ADD;
+ break;
+ case '-':
+ kind = OP_SUB;
+ break;
+ case '|':
+ kind = OP_BIN_OR;
+ break;
+ case '^':
+ kind = OP_BIN_XOR;
+ break;
+ }
+
+ if (kind != OP_NONE) {
+ *op = kind;
+ token_consume();
+ return true;
+ }
+
+ return false;
+}
+
+typedef struct Expr Expr;
+
+struct Expr {
+ enum ExprKind kind;
+ enum OpKind op;
+ Expr *left;
+ Expr *right;
+ int32_t operand;
+};
+
+Expr *expr_alloc(void) {
+ Expr *e = malloc(sizeof(Expr));
+ *e = (Expr) {0};
+ return e;
+}
+
+Expr *expr_unary(enum OpKind o, Expr *u) {
+ Expr *e = expr_alloc();
+ *e = (Expr) {
+ .kind = EXPR_UNARY,
+ .op = o,
+ .left = u,
+ };
+ return e;
+}
+
+Expr *expr_binary(enum OpKind o, Expr *l, Expr *r) {
+ Expr *e = expr_alloc();
+ *e = (Expr) {
+ .kind = EXPR_BINARY,
+ .op = o,
+ .left = l,
+ .right = r,
+ };
+ return e;
+}
+
+void print_expr(Expr *e) {
+ assert(e);
+
+ char *op = NULL;
+ switch (e->op) {
+ case OP_NONE: break;
+ case OP_ADD: op = "+"; break;
+ case OP_SUB: op = "-"; break;
+ case OP_BIN_OR: op = "|"; break;
+ case OP_BIN_XOR: op = "^"; break;
+ case OP_MUL: op = "*"; break;
+ case OP_DIV: op = "/"; break;
+ case OP_MOD: op = "%"; break;
+ case OP_SHIFT_L: op = "<<"; break;
+ case OP_SHIFT_R: op = ">>"; break;
+ case OP_BIN_AND: op = "&"; break;
+ case OP_NEG: op = "-"; break;
+ case OP_BIN_NOT: op = "~"; break;
default:
- printf("TOKEN: '%c'\n", token.kind);
+ ERROR();
break;
}
+
+ if (e->kind != EXPR_OPERAND) {
+ printf("(");
+ }
+
+ switch (e->kind) {
+ case EXPR_OPERAND:
+ printf("%i", e->operand);
+ break;
+ case EXPR_UNARY:
+ if (op) {
+ printf("%s ", op);
+ }
+ print_expr(e->left);
+ break;
+ case EXPR_BINARY:
+ printf("%s ", op);
+ print_expr(e->left);
+ printf(" ");
+ print_expr(e->right);
+ break;
+ case EXPR_NONE: // fallthrough
+ default:
+ ERROR();
+ break;
+ }
+
+ if (e->kind != EXPR_OPERAND) {
+ printf(")");
+ }
}
-void lex_test()
-{
- char *source = "+()_HELLO1,234+FOO!994";
- g_stream = source;
- lex_next_token();
- while (g_token.kind)
- {
- print_token(g_token);
- lex_next_token();
+int32_t parse_number(void) {
+ Token t = token_require_kind(TOKEN_INT);
+ return t.val;
+}
+
+Expr *parse_expr_operand(void) {
+ Expr *e = expr_alloc();
+ *e = (Expr) {
+ .operand = parse_number(),
+ .kind = EXPR_OPERAND,
+ };
+ return e;
+}
+
+Expr *parse_expr_unary(void) {
+ enum OpKind op = 0;
+ if (unary_op(&op)) {
+ return expr_unary(op, parse_expr_unary());
+ } else {
+ return parse_expr_operand();
+ }
+}
+
+Expr *parse_expr_mul(void) {
+ Expr *e = parse_expr_unary();
+ enum OpKind op = 0;
+ while (mul_op(&op)) {
+ e = expr_binary(op, e, parse_expr_unary());
}
+ return e;
+}
+
+Expr *parse_expr_add(void) {
+ Expr *e = parse_expr_mul();
+ enum OpKind op = 0;
+ while (add_op(&op)) {
+ e = expr_binary(op, e, parse_expr_mul());
+ }
+
+ return e;
+}
+
+Expr *parse_expr(void) {
+ return parse_expr_add();
+}
+
+// precedence climbing
+// op_unary = "-" | "~".
+// op_mul = "*" | "/" | "%" | "<<" | ">>" | "&".
+// op_add = "+" | "-" | "|" | "^".
+// exp_operand = number.
+// expr_unary = ([op_unary] exp_unary) | exp_base.
+// expr_mul = expr_unary {op_mul expr_unary}.
+// expr_add = expr_mul {op_add expr_mul}.
+// expr = expr_add.
+// number = "0" .. "9" {"0" .. "9"}.
+void parse_test(void) {
+ char *source = "12*34 + 45/56 + -25";
+ printf("%s\n", source);
+ g_stream = source;
+ Expr *e = parse_expr();
+ print_expr(e);
}
-int main(void)
-{
+int main(void) {
buf_test();
lex_test();
+ parse_test();
}