From 2f2d130c80bf70a306337b8bd7ab672ee76e5762 Mon Sep 17 00:00:00 2001 From: Ian Piumarta Date: Sat, 18 Jan 2025 18:42:12 +0900 Subject: [PATCH] Initial commit. --- Makefile | 29 + assert.h | 0 demofiles/Makefile | 14 + demofiles/dangling-pointer.c | 18 + demofiles/invalid-pointer.c | 11 + demofiles/memory-leak.c | 14 + demofiles/multiple-free.c | 13 + demofiles/null-pointer.c | 9 + demofiles/out-of-bounds-access.c | 10 + demofiles/pointer-compare.c | 15 + demofiles/pointer-increment.c | 11 + demofiles/pointer-out-of-bounds.c | 10 + demofiles/segmentation-fault.c | 9 + demofiles/use-after-free.c | 15 + main.leg | 1826 +++++++++++++++++++++++++++++ stdio.h | 1 + stdlib.h | 0 string.h | 0 test.txt | 11 + 19 files changed, 2016 insertions(+) create mode 100644 Makefile create mode 100644 assert.h create mode 100644 demofiles/Makefile create mode 100644 demofiles/dangling-pointer.c create mode 100644 demofiles/invalid-pointer.c create mode 100644 demofiles/memory-leak.c create mode 100644 demofiles/multiple-free.c create mode 100644 demofiles/null-pointer.c create mode 100644 demofiles/out-of-bounds-access.c create mode 100644 demofiles/pointer-compare.c create mode 100644 demofiles/pointer-increment.c create mode 100644 demofiles/pointer-out-of-bounds.c create mode 100644 demofiles/segmentation-fault.c create mode 100644 demofiles/use-after-free.c create mode 100644 main.leg create mode 100644 stdio.h create mode 100644 stdlib.h create mode 100644 string.h create mode 100644 test.txt diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6b3ed01 --- /dev/null +++ b/Makefile @@ -0,0 +1,29 @@ +CFLAGS = -g +CPPFLAGS = -I/opt/local/include +LDFLAGS = -L/opt/local/lib +LDLIBS = -lgc + +all : main + +%.c : %.leg + leg -o $@ $< + +% : %.c + cc $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) -o $@ $< + +test : main + ./main -xvv test.txt + +demo : main + for i in demofiles/*.c; do echo $$i; ./main -x < $$i; done + +demov : main + for i in demofiles/*.c; do echo $$i; ./main -x -vv < $$i; done + +spotless : clean + rm -rf *~ *.dSYM + +clean : .FORCE + rm -f main + +.FORCE : diff --git a/assert.h b/assert.h new file mode 100644 index 0000000..e69de29 diff --git a/demofiles/Makefile b/demofiles/Makefile new file mode 100644 index 0000000..a64e087 --- /dev/null +++ b/demofiles/Makefile @@ -0,0 +1,14 @@ +SRC = $(wildcard *.c) +EXE = $(SRC:.c=) + +CFLAGS = -Wno-return-stack-address + +all : $(EXE) + +spotless : clean + rm -f *~ + +clean : .FORCE + rm -rf $(EXE) *.dSYM + +.FORCE : diff --git a/demofiles/dangling-pointer.c b/demofiles/dangling-pointer.c new file mode 100644 index 0000000..8d17ac9 --- /dev/null +++ b/demofiles/dangling-pointer.c @@ -0,0 +1,18 @@ +// dangling-pointer + +#include +#include +#include + +int *alloc() { + int i; + return &i; +} + +int main() { + int *ptr = alloc(); + assert(ptr); + *ptr = 42; + printf("%d\n", *ptr); + return 0; +} diff --git a/demofiles/invalid-pointer.c b/demofiles/invalid-pointer.c new file mode 100644 index 0000000..e0892c1 --- /dev/null +++ b/demofiles/invalid-pointer.c @@ -0,0 +1,11 @@ +// invalid-pointer + +#include + +int main() { + int *ptr = (int *)(intptr_t)0xDeadD0d0; + printf("%p\n",ptr); + *ptr = 42; // illegal memory access + printf("%d\n", *ptr); + return 0; +} diff --git a/demofiles/memory-leak.c b/demofiles/memory-leak.c new file mode 100644 index 0000000..d3dc83b --- /dev/null +++ b/demofiles/memory-leak.c @@ -0,0 +1,14 @@ +// memory-leak + +#include +#include +#include + +int main() { + for (int i = 0; i < 10; ++i) { + int *ptr = malloc(sizeof(*ptr)); + assert(ptr); + *ptr = i; + } + return 0; +} diff --git a/demofiles/multiple-free.c b/demofiles/multiple-free.c new file mode 100644 index 0000000..1d38172 --- /dev/null +++ b/demofiles/multiple-free.c @@ -0,0 +1,13 @@ +// multiple-free + +#include +#include +#include + +int main() { + int *ptr = malloc(sizeof(*ptr)); + assert(ptr); + free(ptr); + free(ptr); + return 0; +} diff --git a/demofiles/null-pointer.c b/demofiles/null-pointer.c new file mode 100644 index 0000000..eb9518b --- /dev/null +++ b/demofiles/null-pointer.c @@ -0,0 +1,9 @@ +// null-pointer + +#include + +int main() { + char *ptr = NULL; + printf("%s\n", ptr); + return 0; +} diff --git a/demofiles/out-of-bounds-access.c b/demofiles/out-of-bounds-access.c new file mode 100644 index 0000000..ff17732 --- /dev/null +++ b/demofiles/out-of-bounds-access.c @@ -0,0 +1,10 @@ +// out-of-bounds-access + +#include + +int main() { + int array[5] = { 0, 1, 2, 3, 4 }, i = 4; + printf("%d\n", array[i++]); + printf("%d\n", array[i++]); // out of bounds + return 0; +} diff --git a/demofiles/pointer-compare.c b/demofiles/pointer-compare.c new file mode 100644 index 0000000..8b9e62f --- /dev/null +++ b/demofiles/pointer-compare.c @@ -0,0 +1,15 @@ +// pointer-compare + +#include +#include + +int main() { + int array[5] = {0, 1, 2, 3, 4}; + int brray[5] = {0, 1, 2, 3, 4}; + int *p = array + 2; + int *q = array + 4; + int *r = brray + 4; + if (p > q) abort(); + if (p > r) abort(); // illegal comparison + return 0; +} diff --git a/demofiles/pointer-increment.c b/demofiles/pointer-increment.c new file mode 100644 index 0000000..a3c3eb2 --- /dev/null +++ b/demofiles/pointer-increment.c @@ -0,0 +1,11 @@ +// pointer-increment + +#include + +int main() { + int array[5] = {0, 1, 2, 3, 4}; + int *ptr = array + 4; + ++ptr; + ++ptr; // out of bounds + return 0; +} diff --git a/demofiles/pointer-out-of-bounds.c b/demofiles/pointer-out-of-bounds.c new file mode 100644 index 0000000..b163336 --- /dev/null +++ b/demofiles/pointer-out-of-bounds.c @@ -0,0 +1,10 @@ +// pointer-out-of-bounds + +#include + +int main() { + int array[5] = {0, 1, 2, 3, 4}; + int *ptr = &array[5]; + *ptr = 42; + return 0; +} diff --git a/demofiles/segmentation-fault.c b/demofiles/segmentation-fault.c new file mode 100644 index 0000000..97809fe --- /dev/null +++ b/demofiles/segmentation-fault.c @@ -0,0 +1,9 @@ +// segmentation-fault + +#include + +int main() { + int *ptr = NULL; + *ptr = 42; + return 0; +} diff --git a/demofiles/use-after-free.c b/demofiles/use-after-free.c new file mode 100644 index 0000000..0a1dbf8 --- /dev/null +++ b/demofiles/use-after-free.c @@ -0,0 +1,15 @@ +// use-after-free + +#include +#include +#include + +int main() { + int *ptr = malloc(sizeof(*ptr)); + assert(ptr); + *ptr = 42; + free(ptr); + printf("%d\n", *ptr); // use after free + *ptr = 43; // use after free + return 0; +} diff --git a/main.leg b/main.leg new file mode 100644 index 0000000..712838e --- /dev/null +++ b/main.leg @@ -0,0 +1,1826 @@ +%{ +; +#include +#include +#include +#include +#include +#include +#include + +void fatal(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + fprintf(stderr, "\n"); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(1); +} + +#define USEGC 1 + +#if USEGC +# include +# define MALLOC(N) GC_malloc(N) +# define REALLOC(P, N) GC_realloc(P, N) +# define FREE(P) GC_free(P) +#else +# define MALLOC(N) malloc(N) +# define REALLOC(P, N) realloc(P, N) +# define free(P) free(P) +#endif + +#define TAGBITS 2 +#define TAGMASK ((1UL << TAGBITS) - 1) + +#if TAGBITS >= 1 +# define TAGPTR 0b00 +# define TAGINT 0b01 +# if TAGBITS >= 2 +# define TAGFLOAT 0b10 +# endif +#endif + +typedef union Object Object, *oop; + +#define YYSTYPE oop + +typedef enum { + Undefined, Input, Integer, Float, Symbol, Pair, String, Array, + Primitive, Closure, Function, Call, + Block, Unary, Binary, Cast, While, For, If, Return, Continue, Break, + Type, Struct, + VarDecls, FunDefn, +} type_t; + +typedef enum { NEG, NOT, COM, DEREF, REF, PREINC, PREDEC, POSTINC, POSTDEC } unary_t; + +typedef enum { + INDEX, + MUL, DIV, MOD, ADD, SUB, SHL, SHR, + LT, LE, GE, GT, EQ, NE, + BAND, BXOR, BOR, LAND, LOR, + ASSIGN, +} binary_t; + +typedef oop (* prim_t)(int nargs, oop *arguments, oop environment); + +struct Undefined { type_t _type; }; +struct Input { type_t _type; char *name; FILE *file; oop next; }; +struct Integer { type_t _type; long value; }; +struct Float { type_t _type; double value; }; +struct Symbol { type_t _type; char *name; oop value; }; +struct Pair { type_t _type; oop head, tail; }; +struct String { type_t _type; int size; char *elements; }; +struct Array { type_t _type; int size; oop *elements; }; +struct Primitive { type_t _type; prim_t function; char *name; }; +struct Closure { type_t _type; oop function, environment; }; +struct Function { type_t _type; oop parameters, body, *code; }; +struct Call { type_t _type; oop function, arguments; }; +struct Block { type_t _type; oop statements; }; +struct Unary { type_t _type; unary_t operator; oop rhs; }; +struct Binary { type_t _type; binary_t operator; oop lhs, rhs; }; +struct Cast { type_t _type; oop type, declarator, rhs; }; +struct While { type_t _type; oop condition, expression; }; +struct For { type_t _type; oop initialiser, condition, update, body; }; +struct If { type_t _type; oop condition, consequent, alternate; }; +struct Return { type_t _type; oop value; }; +struct Continue { type_t _type; }; +struct Break { type_t _type; oop value; }; + +struct Type { type_t _type; char *name; }; +struct Struct { type_t _type; oop tag, members; }; + +struct VarDecls { type_t _type; oop type, declarations, variables; }; +struct FunDefn { type_t _type; oop type, name, parameters, body; }; + +union Object +{ + type_t _type; + struct Input Input; + struct Integer Integer; + struct Float Float; + struct Symbol Symbol; + struct Pair Pair; + struct String String; + struct Array Array; + struct Primitive Primitive; + struct Closure Closure; + struct Function Function; + struct Call Call; + struct Block Block; + struct Unary Unary; + struct Binary Binary; + struct Cast Cast; + struct For For; + struct While While; + struct If If; + struct Return Return; + struct Continue Continue; + struct Break Break; + struct Type Type; + struct Struct Struct; + struct VarDecls VarDecls; + struct FunDefn FunDefn; +}; + +int opt_O = 0; // optimise (use VM) +int opt_v = 0; // verbose (print eval output, parser output, compiled code) +int opt_x = 0; // disable execution + +Object _nil = { ._type = Undefined }; + +#define nil (&_nil) +#define false (&_nil) +oop true = 0; + +oop _new(size_t size, type_t type) +{ + oop obj = MALLOC(size); + obj->_type = type; + return obj; +} + +#define new(TYPE) _new(sizeof(struct TYPE), TYPE) + +oop newInteger(long value) +{ +# if TAGINT + value <<= 1; // make room for bit on right + value |= 1; // set it to 1 + return (oop )(intptr_t)value; +# else + oop obj = new(Integer); + obj->Integer.value = value; + return obj; +# endif +} + +oop newFloat(double value) +{ +# if TAGFLOAT + union { double d; intptr_t i; oop p; } u; + u.d = value; + u.i &= ~TAGMASK; + u.i |= TAGFLOAT; + return u.p; +# else + oop obj = new(Float); + obj->Float.value = value; + return obj; +# endif +} + +type_t getType(oop obj) +{ +# if TAGINT + if ((intptr_t)obj & 1) return Integer; +# endif +# if TAGFLOAT + if (((intptr_t)obj & TAGMASK) == TAGFLOAT) return Float; +# endif + return obj->_type; +} + +int is(type_t type, oop obj) { return type == getType(obj); } + +oop _check(oop obj, type_t type, char *file, int line) +{ + if (type != getType(obj)) + fatal("%s:%d: expected type %d, got type %d", file, line, type, getType(obj)); + return obj; +} + +#define get(OBJ, TYPE, MEMBER) (_check(OBJ, TYPE, __FILE__, __LINE__)->TYPE.MEMBER) +#define set(OBJ, TYPE, MEMBER, VALUE) (_check(OBJ, TYPE, __FILE__, __LINE__)->TYPE.MEMBER = (VALUE)) + +long _integerValue(oop obj) +{ +# if TAGINT + assert(is(Integer, obj)); + return (intptr_t)obj >> 1; +# else + return get(obj, Integer,value); +# endif +} + +double _floatValue(oop obj) +{ +# if TAGFLOAT + union { double d; oop p; } u; + u.p = obj; + return u.d; +# else + return get(obj, Float,value); +# endif +} + +long integerValue(oop obj) +{ + switch (getType(obj)) { + case Integer: return _integerValue(obj); + case Float: return _floatValue(obj); + default: break; + } + fatal("cannot convert type %d to integer", getType(obj)); + return 0; +} + +double floatValue(oop obj) +{ + switch (getType(obj)) { + case Integer: return _integerValue(obj); + case Float: return _floatValue(obj); + default: break; + } + fatal("cannot convert type %d to float", getType(obj)); + return 0; +} + +oop newSymbol(char *name) +{ + oop obj = new(Symbol); + obj->Symbol.name = strdup(name); + obj->Symbol.value = nil; + return obj; +} + +char *symbolName(oop obj) +{ + return get(obj, Symbol,name); +} + +oop *symbols = 0; +int nsymbols = 0; + +oop intern(char *name) +{ + // find existing + int lo = 0, hi = nsymbols - 1; + while (lo <= hi) { + int mid = (lo + hi) / 2; + oop sym = symbols[mid]; + int cmp = strcmp(name, get(sym, Symbol,name)); + if (cmp < 0) hi = mid - 1; + else if (cmp > 0) lo = mid + 1; + else return sym; // target found + } + // create new + oop sym = newSymbol(name); // sizeof Symbol + // insert new symbol at index lo (where sym would have been found) + symbols = REALLOC(symbols, sizeof(*symbols) * (nsymbols + 1)); + memmove(symbols + lo + 1, // move entries to this location in the array + symbols + lo, // move entries from this location + sizeof(*symbols) * (nsymbols - lo) // element size * number to move + ); + symbols[lo] = sym; + ++nsymbols; + return sym; +} + +oop newPair(oop head, oop tail) +{ + oop obj = new(Pair); + obj->Pair.head = head; + obj->Pair.tail = tail; + return obj; +} + +oop head(oop pair) { return get(pair, Pair,head); } +oop tail(oop pair) { return get(pair, Pair,tail); } + +oop assoc(oop alist, oop key) +{ + while (is(Pair, alist)) { + oop pair = head(alist); + if (key == get(pair, Pair,head)) return pair; + alist = tail(alist); + } + return nil; +} + +oop newString(void) +{ + oop obj = new(String); + obj->String.elements = 0; // empty string + obj->String.size = 0; + return obj; +} + +oop newStringWith(char *s) +{ + oop obj = new(String); + obj->String.elements = strdup(s); + obj->String.size = strlen(s); + return obj; +} + +int String_append(oop string, int element) +{ + char *elements = get(string, String,elements); + int size = get(string, String,size); + elements = REALLOC(elements, sizeof(*elements) * (size + 1)); + set(string, String,elements, elements); + set(string, String,size, size + 1); + return elements[size] = element; +} + +oop newArray(void) +{ + oop obj = new(Array); + obj->Array.elements = 0; // empty array + obj->Array.size = 0; + return obj; +} + +oop Array_append(oop array, oop element) +{ + oop *elements = get(array, Array,elements); + int size = get(array, Array,size); + elements = REALLOC(elements, sizeof(*elements) * (size + 1)); + set(array, Array,elements, elements); + set(array, Array,size, size + 1); + return elements[size] = element; +} + +oop Array_set(oop array, int index, oop element) +{ + oop *elements = get(array, Array,elements); + int size = get(array, Array,size); + if (index >= size) fatal("array index %d out of bounds %d", index, size); + return elements[index] = element; +} + +oop newPrimitive(prim_t function, char *name) +{ + oop obj = new(Primitive); + obj->Primitive.function = function; + obj->Primitive.name = name; + return obj; +} + +oop newClosure(oop function, oop environment) +{ + oop obj = new(Closure); + obj->Closure.function = function; + obj->Closure.environment = environment; + return obj; +} + +oop newFunction(oop parameters, oop body) +{ + oop obj = new(Function); + obj->Function.parameters = parameters; + obj->Function.body = body; + obj->Function.code = 0; + return obj; +} + +oop newCall(oop function, oop arguments) +{ + oop obj = new(Call); + obj->Call.function = function; + obj->Call.arguments = arguments; + return obj; +} + +oop newBlock(oop statements) +{ + oop obj = new(Block); + obj->Block.statements = statements; + return obj; +} + +oop newUnary(unary_t operator, oop operand) +{ + oop obj = new(Unary); + obj->Unary.operator = operator; + obj->Unary.rhs = operand; + return obj; +} + +oop newBinary(binary_t operator, oop lhs, oop rhs) +{ + oop obj = new(Binary); + obj->Binary.operator = operator; + obj->Binary.lhs = lhs; + obj->Binary.rhs = rhs; + return obj; +} + +oop newCast(oop type, oop declarator, oop rhs) +{ + oop obj = new(Cast); + obj->Cast.type = type; + obj->Cast.declarator = declarator; + obj->Cast.rhs = rhs; + return obj; +} + +oop newWhile(oop condition, oop expression) +{ + oop obj = new(While); + obj->While.condition = condition; + obj->While.expression = expression; + return obj; +} + +oop newFor(oop initialiser, oop condition, oop update, oop body) +{ + oop obj = new(For); + obj->For.initialiser = initialiser; + obj->For.condition = condition; + obj->For.update = update; + obj->For.body = body; + return obj; +} + +oop newIf(oop condition, oop consequent, oop alternate) +{ + oop obj = new(If); + obj->If.condition = condition; + obj->If.consequent = consequent; + obj->If.alternate = alternate; + return obj; +} + +oop newReturn(oop value) +{ + oop obj = new(Return); + obj->Return.value = value; + return obj; +} + +oop newContinue(void) +{ + return new(Continue); +} + +oop newBreak(oop value) +{ + oop obj = new(Break); + obj->Break.value = value; + return obj; +} + +void println(oop obj); + +oop newType(char *name) +{ + oop obj = new(Type); + obj->Type.name = name; + return obj; +} + +oop Type_void = 0; +oop Type_char = 0; +oop Type_int = 0; + +oop newStruct(oop tag, oop members) +{ + oop obj = new(Struct); + obj->Struct.tag = tag; + obj->Struct.members = members; + return obj; +} + +oop newVarDecls(oop type, oop declaration) +{ + oop obj = new(VarDecls); + obj->VarDecls.type = type; + obj->VarDecls.declarations = newArray(); + obj->VarDecls.variables = newArray(); + Array_append(obj->VarDecls.declarations, declaration); + return obj; +} + +void VarDecls_append(oop vd, oop declaration) +{ + Array_append(get(vd, VarDecls,declarations), declaration); +} + +oop newFunDefn(oop type, oop name, oop parameters, oop body) +{ + oop obj = new(FunDefn); + obj->FunDefn.type = type; + obj->FunDefn.name = name; + obj->FunDefn.parameters = parameters; + obj->FunDefn.body = body; + return obj; +} + +void printiln(oop obj, int indent) +{ + printf("%*s", indent*2, ""); + switch (getType(obj)) { + case Undefined: printf("nil\n"); break; + case Input: printf("<%s>\n", get(obj, Input,name)); break; + case Integer: printf("%ld\n", integerValue(obj)); break; + case Float: printf("%f\n", floatValue(obj)); break; + case Symbol: printf("%s\n", symbolName (obj)); break; + case Pair: { + printf("PAIR\n"); + printiln(head(obj), indent+1); + printiln(tail(obj), indent+1); + break; + } + case String: { + char *elts = get(obj, String,elements); + int size = get(obj, String,size); + printf("STRING %d \"", size); + for (int i = 0; i < size; ++i) { + int c = elts[i]; + if ('"' == c) + printf("\\\""); + else if (31 < c && c < 127) + putchar(c); + else + printf("\\x%02x", c); + } + printf("\"\n"); + break; + } + case Array: { + oop *elts = get(obj, Array,elements); + int size = get(obj, Array,size); + printf("ARRAY %d\n", size); + for (int i = 0; i < size; ++i) + printiln(elts[i], indent+1); + break; + } + case Primitive: { + printf("PRIMITIVE<%s>\n", get(obj, Primitive,name)); + break; + } + case Closure: { + printf("CLOSURE\n"); + printiln(get(obj, Closure,function), indent+1); + break; + } + case Function: { + printf("FUNCTION\n"); + printiln(get(obj, Function,parameters), indent+1); + printiln(get(obj, Function,body), indent+1); + break; + } + case Call: { + printf("CALL\n"); + printiln(get(obj, Call,function), indent+1); + printiln(get(obj, Call,arguments), indent+1); + break; + } + case Block: { + printf("BLOCK\n"); + printiln(get(obj, Block,statements), indent+1); + break; + } + case Unary: { + switch (get(obj, Unary,operator)) { + case NEG: printf("NEG\n"); break; + case NOT: printf("NOT\n"); break; + case COM: printf("COM\n"); break; + case DEREF: printf("DEREF\n"); break; + case REF: printf("REF\n"); break; + case PREINC: printf("PREINC\n"); break; + case PREDEC: printf("PREDEC\n"); break; + case POSTINC: printf("POSTINC\n"); break; + case POSTDEC: printf("POSTDEC\n"); break; + } + printiln(get(obj, Unary,rhs), indent+1); + break; + } + case Binary: { + switch (get(obj, Binary,operator)) { + case INDEX: printf("INDEX\n"); break; + case MUL: printf("MUL\n"); break; + case DIV: printf("DIV\n"); break; + case MOD: printf("MOD\n"); break; + case ADD: printf("ADD\n"); break; + case SUB: printf("SUB\n"); break; + case SHL: printf("SHL\n"); break; + case SHR: printf("SHR\n"); break; + case LT: printf("LT\n"); break; + case LE: printf("LE\n"); break; + case GE: printf("GE\n"); break; + case GT: printf("GT\n"); break; + case EQ: printf("EQ\n"); break; + case NE: printf("NE\n"); break; + case BAND: printf("BAND\n"); break; + case BXOR: printf("BXOR\n"); break; + case BOR: printf("BOR\n"); break; + case LAND: printf("LAND\n"); break; + case LOR: printf("LOR\n"); break; + case ASSIGN: printf("ASSIGN\n"); break; + } + printiln(get(obj, Binary,lhs), indent+1); + printiln(get(obj, Binary,rhs), indent+1); + break; + } + case Cast: { + printf("CAST\n"); + printiln(get(obj, Cast,type ), indent+1); + printiln(get(obj, Cast,declarator), indent+1); + printiln(get(obj, Cast,rhs ), indent+1); + break; + } + case While: { + printf("WHILE\n"); + printiln(get(obj, While,condition), indent+1); + printiln(get(obj, While,expression), indent+1); + break; + } + case For: { + printf("For\n"); + printiln(get(obj, For,initialiser), indent+1); + printiln(get(obj, For,condition), indent+1); + printiln(get(obj, For,update), indent+1); + printiln(get(obj, For,body), indent+1); + break; + } + case If: { + printf("IF\n"); + printiln(get(obj, If,condition), indent+1); + printiln(get(obj, If,consequent), indent+1); + printiln(get(obj, If,alternate), indent+1); + break; + } + case Return: { + printf("RETURN\n"); + printiln(get(obj, Return,value), indent+1); + break; + } + case Continue: { + printf("CONTINUE\n"); + break; + } + case Break: { + printf("BREAK\n"); + printiln(get(obj, Break,value), indent+1); + break; + } + case Type: { + printf("Type %s\n", get(obj, Type,name)); + break; + } + case Struct: { + printf("Struct\n"); + printiln(get(obj, Struct,tag ), indent+1); + printiln(get(obj, Struct,members), indent+1); + break; + } + case VarDecls: { + printf("VarDecls\n"); + printiln(get(obj, VarDecls,type ), indent+1); + printiln(get(obj, VarDecls,declarations), indent+1); + printiln(get(obj, VarDecls,variables ), indent+1); + break; + } + case FunDefn: { + printf("FunDefn\n"); + printiln(get(obj, FunDefn,type ), indent+1); + printiln(get(obj, FunDefn,name ), indent+1); + printiln(get(obj, FunDefn,parameters), indent+1); + printiln(get(obj, FunDefn,body ), indent+1); + break; + } + } +} + +void println(oop obj) +{ + printiln(obj, 0); +} + +oop input = 0; + +oop pushInput(char *name, FILE *file) +{ + oop obj = new(Input); + obj->Input.name = strdup(name); + obj->Input.file = file; + obj->Input.next = input; + return input = obj; +} + +void popInput(void) +{ + if (!input) return; + oop obj = input; + input = get(obj, Input,next); + free(get(obj, Input,name)); + fclose(get(obj, Input,file)); + FREE(obj); +} + +FILE *sysOpen(char *path) +{ + FILE *fp = fopen(path, "r"); + if (!fp) fatal("#include <%s>: %s", path, strerror(errno)); + return fp; +} + +FILE *usrOpen(char *path) +{ + FILE *fp = fopen(path, "r"); + if (!fp) fatal("#include \"%s\": %s", path, strerror(errno)); + return fp; +} + +int getChar(char *buf) +{ + while (input) { + int c = getc(get(input, Input,file)); + if (c != EOF) { + *buf = c; + return 1; + } + popInput(); + } + return 0; +} + +#define YY_INPUT(buf, result, max_size) { result = getChar(buf); } + +YYSTYPE yysval = 0; + +void expected(oop where, char *what) +{ + fatal("%s expected near: %.*s", what, get(where, String,size), get(where, String,elements)); +} + +%} + +start = - ( include { yysval = 0 } + | x:tldecl { yysval = x } + | !. { yysval = 0 } + | e:error { expected(e, "declaration") } + ) + +error = < (![\n\r] .)* > { $$ = newStringWith(yytext) } + +include = HASH INCLUDE ( + '<' < [a-zA-Z0-9_.]* > '>' @{ pushInput(yytext, sysOpen(yytext)) } + | '"' < [a-zA-Z0-9_.]* > '"' @{ pushInput(yytext, usrOpen(yytext)) } + ) + +tldecl = fundefn | vardecl + +vardecl = t:tname d:inidecl { d = newVarDecls(t, d) } + ( COMMA e:inidecl { VarDecls_append(d, e) } + )* SEMI { $$ = d } + +tname = INT { $$ = Type_int } + | CHAR { $$ = Type_char } + | VOID { $$ = Type_void } + | struct + | i:id + +struct = STRUCT ( i:id m:members { $$ = newStruct( i, m) } + | i:id { $$ = newStruct(nil, m) } + | m:members { $$ = newStruct( i, nil) } + ) + +members = LBRACE vardecl* RBRACE + +inidecl = d:decltor ( ASSIGN e:initor { $$ = newBinary(ASSIGN, d, e) } + | { $$ = d } + ) + +decltor = STAR d:decltor { $$ = newUnary(DEREF, d) } + | ddector + +ddector = ( LPAREN d:decltor RPAREN + | d:idopt + ) ( LBRAK e:expropt RBRAK { d = newBinary(INDEX, d, e) } + | p:params { d = newCall(d, e) } + )* { $$ = d } + +params = LPAREN a:mkArray + ( p:pdecl { Array_append(a, p) } + ( COMMA p:pdecl { Array_append(a, p) } + )* )? RPAREN { $$ = a } + +pdecl = t:tname d:decltor { $$ = newVarDecls(t, d) } + +initor = agrinit | expr + +agrinit = LBRACE i:mkArray + ( j:initor { Array_append(i, j) } + ( COMMA j:initor { Array_append(i, j) } + )* COMMA? )? RBRACE { $$ = i } + +fundefn = t:tname d:funid p:params b:block { $$ = newFunDefn(t, d, p, b) } + +funid = STAR d:funid { $$ = newUnary(DEREF, d) } + | LPAREN d:funid RPAREN { $$ = d } + | id + +block = LBRACE b:mkArray + ( s:stmt { Array_append(b, s) } + )* RBRACE { $$ = newBlock(b) } + +stmt = WHILE c:cond s:stmt { $$ = newWhile(c, e) } + | FOR LPAREN + ( i:vardecl | i:expropt SEMI ) + c:expropt SEMI u:expropt RPAREN + b:stmt { $$ = newFor(i, c, u, b) } + | IF c:cond s:stmt + ( ELSE t:stmt { $$ = newIf(c, s, t) } + | { $$ = newIf(c, s, nil) } + ) + | RETURN e:expropt SEMI { $$ = newReturn(e) } + | CONTINU SEMI { $$ = newContinue() } + | BREAK SEMI { $$ = newBreak(nil) } + | block + | vardecl + | e:expr SEMI { $$ = e } + +cond = LPAREN e:expr RPAREN { $$ = e } + +expropt = expr | { $$ = nil } + +expr = assign + +assign = l:unary ASSIGN x:expr { $$ = newBinary(ASSIGN, l, x) } + | logor + +logor = l:logand ( BARBAR r:logand { l = newBinary(LOR, l, r) } + )* { $$ = l } + +logand = l:bitor ( ANDAND r:bitor { l = newBinary(LAND, l, r) } + )* { $$ = l } + +bitor = l:bitxor ( BAR r:bitxor { l = newBinary(BOR, l, r) } + )* { $$ = l } + +bitxor = l:bitand ( HAT r:bitand { l = newBinary(BXOR, l, r) } + )* { $$ = l } + +bitand = l:equal ( AND r:equal { l = newBinary(BAND, l, r) } + )* { $$ = l } + +equal = l:inequal ( EQUAL r:inequal { l = newBinary(EQ, l, r) } + | NEQUAL r:inequal { l = newBinary(NE, l, r) } + )* { $$ = l } + +inequal = l:shift ( LESS r:shift { l = newBinary(LT, l, r) } + | LESSEQ r:shift { l = newBinary(LE, l, r) } + | GRTREQ r:shift { l = newBinary(GE, l, r) } + | GRTR r:shift { l = newBinary(GT, l, r) } + )* { $$ = l } + +shift = l:sum ( LSHIFT r:sum { l = newBinary(SHL, l, r) } + | RSHIFT r:sum { l = newBinary(SHR, l, r) } + )* { $$ = l } + +sum = l:prod ( PLUS r:prod { l = newBinary(ADD, l, r) } + | MINUS r:prod { l = newBinary(SUB, l, r) } + )* { $$ = l } + +prod = l:unary ( STAR r:unary { l = newBinary(MUL, l, r) } + | SLASH r:unary { l = newBinary(DIV, l, r) } + | PCENT r:unary { l = newBinary(MOD, l, r) } + )* { $$ = l } + +unary = MINUS r:unary { $$ = newUnary(NEG, r) } + | PLING r:unary { $$ = newUnary(NOT, r) } + | TILDE r:unary { $$ = newUnary(COM, r) } + | STAR r:unary { $$ = newUnary(DEREF, r) } + | AND r:unary { $$ = newUnary(REF, r) } + | PPLUS r:unary { $$ = newUnary(PREINC, r) } + | MMINUS r:unary { $$ = newUnary(PREDEC, r) } + | cast + | postfix + +cast = LPAREN t:tname d:decltor + RPAREN r:unary { $$ = newCast(t, d, r) } + +postfix = v:value ( a:args { v = newCall(v, a) } + | i:index { v = newBinary(INDEX, v, i) } + | PPLUS { v = newUnary(POSTINC, a) } + | MMINUS { v = newUnary(POSTDEC, a) } + )* { $$ = v } + +args = LPAREN a:mkArray + ( e:expr { Array_append(a, e) } + ( COMMA e:expr { Array_append(a, e) } + )* )? RPAREN { $$ = a } + +index = LBRAK e:expr RBRAK { $$ = e } + +value = LPAREN e:expr RPAREN { $$ = e } + | float + | integer + | string + | id + +mkArray = { $$ = newArray() } + +float = < [-+]? [0-9]* '.' [0-9]+ ( [eE] [-+]? [0-9]+ )? > - { $$ = newFloat(atof(yytext)) } + | < [-+]? [0-9]+ '.' [0-9]* ( [eE] [-+]? [0-9]+ )? > - { $$ = newFloat(atof(yytext)) } + | < [-+]? [0-9]+ '.'? [0-9]* ( [eE] [-+]? [0-9]+ ) > - { $$ = newFloat(atof(yytext)) } + +integer = "0x" < [0-9a-fA-F]+ > - { $$ = newInteger(strtol(yytext, 0, 16)) } + | "0b" < [0-1]+ > - { $$ = newInteger(strtol(yytext, 0, 2)) } + | < [0-9]+ > - { $$ = newInteger(strtol(yytext, 0, 10)) } + | "'" !"'" c:char "'" - { $$ = c } + +mkStr = { $$ = newString() } + +string = '"' s:mkStr + ( !'"' c:char { String_append(s, _integerValue(c)) } + )* '"' - { $$ = s } + +char = '\\' e:escaped { $$ = e } + | < . > { $$ = newInteger(yytext[0]) } + +escaped = 'a' { $$ = newInteger('\a') } + | 'b' { $$ = newInteger('\b') } + | 'f' { $$ = newInteger('\f') } + | 'n' { $$ = newInteger('\n') } + | 'r' { $$ = newInteger('\r') } + | 't' { $$ = newInteger('\t') } + | 'v' { $$ = newInteger('\v') } + | "'" { $$ = newInteger('\'') } + | '"' { $$ = newInteger('\"') } + | '\\' { $$ = newInteger('\\') } + | < OCT OCT? OCT? > { $$ = newInteger(strtol(yytext, 0, 8)) } + | 'x' < HEX? HEX? > { $$ = newInteger(strtol(yytext, 0, 16)) } + | 'u' < HEX? HEX? HEX? HEX? > { $$ = newInteger(strtol(yytext, 0, 16)) } + +OCT = [0-7] +HEX = [0-9a-fA-F] + +idopt = id | { $$ = nil } + +id = !keyword < alpha alnum* > - { $$ = intern(yytext) } + +keyword = VOID | CHAR | INT | STRUCT + | IF | ELSE | WHILE | FOR | RETURN | CONTINU | BREAK + +alpha = [a-zA-Z_] +alnum = [a-zA-Z_0-9] + +- = blank* +blank = [ \t\n\r] | comment +comment = "//" < (![\n\r] .)* > + | "/*" (!"*/" .)* "*/" + +HASH = "#" - +INCLUDE = "include" ![_a-zA-Z0-9] - +VOID = "void" ![_a-zA-Z0-9] - +CHAR = "char" ![_a-zA-Z0-9] - +INT = "int" ![_a-zA-Z0-9] - +STRUCT = "struct" ![_a-zA-Z0-9] - +# UNION = "union" ![_a-zA-Z0-9] - +# ENUM = "enum" ![_a-zA-Z0-9] - +IF = "if" ![_a-zA-Z0-9] - +ELSE = "else" ![_a-zA-Z0-9] - +WHILE = "while" ![_a-zA-Z0-9] - +FOR = "for" ![_a-zA-Z0-9] - +RETURN = "return" ![_a-zA-Z0-9] - +CONTINU = "continue" ![_a-zA-Z0-9] - +BREAK = "break" ![_a-zA-Z0-9] - +ASSIGN = "=" !"=" - +PLUS = "+" !"+" - +PPLUS = "++" - +MINUS = "-" !"-" - +MMINUS = "--" - +STAR = "*" - +BAR = "|" !"|" - +BARBAR = "||" - +AND = "&" !"&" - +ANDAND = "&&" - +HAT = "^" - +EQUAL = "==" - +NEQUAL = "!=" - +LESS = "<" ![=<] - +LESSEQ = "<=" - +GRTREQ = ">=" - +GRTR = ">" ![=>] - +LSHIFT = "<<" - +RSHIFT = ">>" - +SLASH = "/" - +PCENT = "%" - +PLING = "!" !"=" - +TILDE = "~" - +LPAREN = "(" - +RPAREN = ")" - +LBRAK = "[" - +RBRAK = "]" - +LBRACE = "{" - +RBRACE = "}" - +COMMA = "," - +SEMI = ";" - + +%% +; + +#include + +enum { NLR_INIT = 0, NLR_RETURN, NLR_CONTINUE, NLR_BREAK }; + +Object *nlrValue = 0; + +jmp_buf *nlrStack = 0; +int nlrCount = 0; +int nlrMax = 0; + +void _nlrPush(void) +{ + if (nlrCount >= nlrMax) + nlrStack = realloc(nlrStack, sizeof(*nlrStack) * (nlrMax += 8)); +} + +#define nlrPush() setjmp((_nlrPush(), nlrStack[nlrCount++])) + +oop nlrPop(void) +{ + assert(nlrCount > 0); + --nlrCount; + return nlrValue; +} + +#define nlrReturn(TYPE, VAL) ((nlrValue = (VAL), longjmp(nlrStack[nlrCount-1], TYPE))) + +oop define(oop lhs, oop rhs, oop env) +{ + oop kv = assoc(env, lhs); + if (nil != kv) return set(kv, Pair,tail, rhs); // local + if (!is(Symbol, lhs)) fatal("cannot assign to non-symbol type %d", getType(lhs)); + return set(lhs, Symbol,value, rhs); +} + +#define IBINOP(L, OP, R) newInteger(integerValue(L) OP integerValue(R)) +#define IRELOP(L, OP, R) (integerValue(L) OP integerValue(R) ? true : false) + +#define FBINOP(L, OP, R) newFloat(floatValue(L) OP floatValue(R)) +#define FRELOP(L, OP, R) (floatValue(L) OP floatValue(R) ? true : false) + +#define isNil(O) ((O) == nil) +#define isFalse(O) ((O) == nil) +#define isTrue(O) ((O) != nil) + +oop eval(oop exp, oop env); + +oop apply(oop function, oop arguments, oop env) +{ + switch (getType(function)) { + default: { + fatal("object type %d is not callable", getType(function)); + } + case Primitive: { + return get(function, Primitive,function) + ( get(arguments, Array,size), + get(arguments, Array,elements), + env ); + } + case Closure: { + oop env2 = get(function, Closure,environment); + function = get(function, Closure,function); + oop parameters = get(function, Function,parameters); + int nParams = get(parameters, Array,size); + int nArgs = get(arguments, Array,size); + if (nParams != nArgs) + fatal("wrong number of arguments, expected %d got %d", nParams, nArgs); + oop *params = get(parameters, Array,elements); + oop *args = get(arguments, Array,elements); + for (int i = nArgs; i--;) { + oop key = params[i]; + oop val = eval(args[i], env); + env2 = newPair(newPair(key, val), env2); + } + oop body = get(function, Function,body); + oop result = nil; + switch (nlrPush()) { // longjmp occurred + case NLR_INIT: break; + case NLR_RETURN: return nlrPop(); + case NLR_CONTINUE: fatal("continue outside loop"); + case NLR_BREAK: fatal("break outside loop"); + } + result = eval(body, env2); + nlrPop(); + return result; + } + } +} + +oop eval(oop exp, oop env) +{ + switch (getType(exp)) { + case Undefined: assert(!"this cannot happen"); + case Input: assert(!"this cannot happen"); + case Integer: return exp; + case Float: return exp; + case Symbol: { + oop kv = assoc(env, exp); + if (nil == kv) return get(exp, Symbol,value); // global value stored in symbol + return tail(kv); // local value stored in association + } + case Pair: assert(!"this cannot happen"); + case String: return exp; + case Array: assert(!"this cannot happen"); + case Primitive: return exp; + case Closure: return exp; + case Function: return newClosure(exp, env); + case Call: { + oop fun = eval(get(exp, Call,function), env); + oop args = get(exp, Call,arguments); + return apply(fun, args, env); + } + case Block: { + Object *stmts = get(exp, Block,statements); + int size = get(stmts, Array,size); + oop *elts = get(stmts, Array,elements); + Object *result = nil; + for (int i = 0; i < size; ++i) { + result = eval(elts[i], env); + } + return result; + } + case Unary: { + oop rhs = eval(get(exp, Unary,rhs), env); + switch (get(exp, Unary,operator)) { + case NEG: return ( is(Float, rhs) + ? newFloat (-floatValue (rhs)) + : newInteger(-integerValue(rhs)) ); + case NOT: return isFalse(rhs) ? true : false; + case COM: return newInteger(~integerValue(rhs)); + case DEREF: assert(!"unimplemented"); + case REF: assert(!"unimplemented"); + case PREINC: assert(!"unimplemented"); + case PREDEC: assert(!"unimplemented"); + case POSTINC: assert(!"unimplemented"); + case POSTDEC: assert(!"unimplemented"); + } + break; + } + case Binary: { + oop lhs = get(exp, Binary,lhs); + oop rhs = get(exp, Binary,rhs); + switch (get(exp, Binary,operator)) { + case LAND: return isFalse(eval(lhs, env)) ? false : eval(rhs, env); + case LOR: return isTrue (eval(lhs, env)) ? true : eval(rhs, env); + case ASSIGN: { + rhs = eval(rhs, env); + return define(lhs, rhs, env); + } + default: { + lhs = eval(lhs, env); + rhs = eval(rhs, env); + if (Float == getType(lhs) || Float == getType(rhs)) { // floating point result + switch (get(exp, Binary,operator)) { + case INDEX: assert(!"unimplemented"); + case MUL: return FBINOP(lhs, * , rhs); + case DIV: return FBINOP(lhs, / , rhs); + case MOD: return newFloat(fmod(floatValue(lhs), floatValue(rhs))); + case ADD: return FBINOP(lhs, + , rhs); + case SUB: return FBINOP(lhs, - , rhs); + case SHL: return IBINOP(lhs, <<, rhs); + case SHR: return IBINOP(lhs, >>, rhs); + case LT: return FRELOP(lhs, < , rhs); + case LE: return FRELOP(lhs, <=, rhs); + case GE: return FRELOP(lhs, >=, rhs); + case GT: return FRELOP(lhs, > , rhs); + case EQ: return FRELOP(lhs, == , rhs); + case NE: return FRELOP(lhs, !=, rhs); + case BAND: return IBINOP(lhs, & , rhs); + case BXOR: return IBINOP(lhs, ^ , rhs); + case BOR: return IBINOP(lhs, | , rhs); + case LAND: + case LOR: + case ASSIGN: + break; + } + } + else { // integer result + switch (get(exp, Binary,operator)) { + case INDEX: assert("!unimplemented"); + case MUL: return IBINOP(lhs, * , rhs); + case DIV: return IBINOP(lhs, / , rhs); + case MOD: return IBINOP(lhs, % , rhs); + case ADD: return IBINOP(lhs, + , rhs); + case SUB: return IBINOP(lhs, - , rhs); + case SHL: return IBINOP(lhs, <<, rhs); + case SHR: return IBINOP(lhs, >>, rhs); + case LT: return IRELOP(lhs, < , rhs); + case LE: return IRELOP(lhs, <=, rhs); + case GE: return IRELOP(lhs, >=, rhs); + case GT: return IRELOP(lhs, > , rhs); + case EQ: return IRELOP(lhs, == , rhs); + case NE: return IRELOP(lhs, !=, rhs); + case BAND: return IBINOP(lhs, & , rhs); + case BXOR: return IBINOP(lhs, ^ , rhs); + case BOR: return IBINOP(lhs, | , rhs); + case LAND: + case LOR: + case ASSIGN: + break; + } + } + } + } + assert(!"this cannot happen"); + break; + } + case Cast: { + assert(!"unimplemented"); + break; + } + case While: { + oop cond = get(exp, While,condition); + oop expr = get(exp, While,expression); + oop result = nil; + switch (nlrPush()) { + case NLR_INIT: break; + case NLR_RETURN: nlrReturn(NLR_RETURN, nlrPop()); // propagate upwards + case NLR_CONTINUE: break; + case NLR_BREAK: return nlrPop(); + } + while (isTrue(eval(cond, env))) { + result = eval(expr, env); + } + nlrPop(); + return result; + } + case For: { + assert(!"unimplemented"); + return nil; + } + case If: { + oop cond = get(exp, If,condition); + oop conseq = get(exp, If,consequent); + oop altern = get(exp, If,alternate); + return isTrue(eval(cond, env)) ? eval(conseq, env) : eval(altern, env); + } + case Return: { + nlrReturn(NLR_RETURN, eval(get(exp, Return,value), env)); + break; + } + case Continue: { + nlrReturn(NLR_CONTINUE, nil); + break; + } + case Break: { + nlrReturn(NLR_BREAK, eval(get(exp, Break,value), env)); + break; + } + case Type: assert(!"unimplemented"); break; + case Struct: assert(!"unimplemented"); break; + case VarDecls: assert(!"unimplemented"); break; + case FunDefn: assert(!"unimplemented"); break; + } + assert(!"this cannot happen"); + return 0; +} + +// primitive functions + +oop prim_print(int argc, oop *argv, oop env) // array +{ + oop result = nil; + for (int i = 0; i < argc; ++i) { + result = eval(argv[i], env); + println(result); + } + return result; +} + +#include + +oop prim_memsize(int argc, oop *argv, oop env) // array +{ + struct rusage ru; + if (getrusage(RUSAGE_SELF, &ru) < 0) return nil; + return newInteger(ru.ru_maxrss); +} + +enum opcode_t { iHALT = 0, iPUSH, iPOP, + iNOT, iCOM, iNEG, iDEREF, iINDEX, + iMUL, iDIV, iMOD, iADD, iSUB, iSHL, iSHR, + iLT, iLE, iGE, iGT, iEQ, iNE, + iAND, iXOR, iOR, + iGETGVAR, iSETGVAR, + iCLOSE, + iCALL, iRETURN, + iJMP, iJMPF, +}; + +oop stackError(char *reason) +{ + printf("stack %s\n", reason); + exit(1); + return nil; +} + +void disassemble(oop program) +{ + oop *code = get(program, Array,elements); + int size = get(program, Array,size); + int pc = 0; + while (pc < size) { + printf("%04d", pc); + int opcode = _integerValue(code[pc++]); + printf(" %02d\t", opcode); + switch (opcode) { + case iHALT: printf("HALT\n"); break; + case iPUSH: printf("PUSH\t"); println(code[pc++]); break; + case iPOP: printf("POP\n"); break; + case iNOT: printf("NOT\n"); break; + case iCOM: printf("COM\n"); break; + case iNEG: printf("NEG\n"); break; + case iDEREF: printf("DEREF\n"); break; + case iINDEX: printf("INDEX\n"); break; + case iMUL: printf("MUL\n"); break; + case iDIV: printf("DIV\n"); break; + case iMOD: printf("MOD\n"); break; + case iADD: printf("ADD\n"); break; + case iSUB: printf("SUB\n"); break; + case iSHL: printf("SHL\n"); break; + case iSHR: printf("SHR\n"); break; + case iLT: printf("LT\n"); break; + case iLE: printf("LE\n"); break; + case iGE: printf("GE\n"); break; + case iGT: printf("GT\n"); break; + case iEQ: printf("EQ\n"); break; + case iNE: printf("NE\n"); break; + case iAND: printf("AND\n"); break; + case iXOR: printf("XOR\n"); break; + case iOR: printf("OR\n"); break; + case iGETGVAR: printf("GETGVAR\t"); println(code[pc++]); break; + case iSETGVAR: printf("SETGVAR\t"); println(code[pc++]); break; + case iCLOSE: printf("CLOSE\t"); println(code[pc++]); break; + case iCALL: printf("CALL\t"); println(code[pc++]); break; + case iRETURN: printf("RETURN\n"); break; + case iJMP: printf("JMP\t"); println(code[pc++]); break; + case iJMPF: printf("JMPF\t"); println(code[pc++]); break; + } + } +} + + +oop execute(oop program) +{ + oop *code = get(program, Array,elements); + int pc = 0; + + oop stack[32]; + int sp = 32; // clear the stack + + oop env = nil; + + struct Frame { + Object *env; + oop *code; + int pc; + } frames[32]; + int fp = 32; + +# define push(O) (sp > 0 ? stack[--sp] = (O) : stackError("overflow")) +# define pop() (sp < 32 ? stack[sp++] : stackError("underflow")) +# define top (stack[sp]) + + for (;;) { + oop insn = code[pc++]; + switch ((enum opcode_t)_integerValue(insn)) { + case iHALT: { + if (sp < 31) fatal("%d items on stack at end of execution", 32-sp); + if (sp < 32) return stack[sp]; + fatal("stack empty at end of execution"); + return nil; + } + case iPUSH: { + oop operand = code[pc++]; + push(operand); + continue; + } + case iPOP: { + pop(); + continue; + } + case iNOT: { + top = (isFalse(top) ? true : false); + continue; + } + case iCOM: { + top = newInteger(~integerValue(top)); + continue; + } + case iNEG: { + top = is(Float, top) ? newFloat(-floatValue(top)) : newInteger(-integerValue(top)); + continue; + } + case iDEREF: { + assert(!"unimplemented"); + continue; + } + case iINDEX: { + assert(!"unimplemented"); + continue; + } +# define BINOP(OP) { \ + oop rhs = pop(), lhs = pop(); \ + if (is(Float, lhs) || is(Float, rhs)) push(FBINOP(lhs, OP, rhs)); \ + else push(IBINOP(lhs, OP, rhs)); \ + continue; \ + } + case iMUL: BINOP(*); + case iDIV: BINOP(/); + case iMOD: { + oop rhs = pop(), lhs = pop(); + if (is(Float, lhs) || is(Float, rhs)) + push(newFloat(fmod(floatValue(lhs), floatValue(rhs)))); + else + push(IBINOP(lhs, %, rhs)); + continue; + } + case iADD: BINOP(+); + case iSUB: BINOP(-); +# undef BINOP +# define BINOP(OP) { \ + oop rhs = pop(), lhs = pop(); \ + push(IBINOP(lhs, OP, rhs)); \ + continue; \ + } + case iSHL: BINOP(<<); + case iSHR: BINOP(>>); + case iAND: BINOP(&); + case iXOR: BINOP(^); + case iOR: BINOP(|); +# undef BINOP +# define BINOP(OP) { \ + oop rhs = pop(), lhs = pop(); \ + if (is(Float, lhs) || is(Float, rhs)) \ + push(floatValue(lhs) OP floatValue(rhs) ? true : false); \ + else \ + push(integerValue(lhs) OP integerValue(rhs) ? true : false); \ + continue; \ + } + case iLT: BINOP(< ); + case iLE: BINOP(<=); + case iGE: BINOP(>=); + case iGT: BINOP(> ); + case iEQ: BINOP(==); + case iNE: BINOP(!=); +# undef BINOP + case iGETGVAR: { + oop operand = code[pc++]; + oop keyval = assoc(env, operand); + if (nil != keyval) { + push(get(keyval, Pair,tail)); + continue; + } + push(get(operand, Symbol,value)); + continue; + } + case iSETGVAR: { + oop operand = code[pc++]; + oop keyval = assoc(env, operand); + if (nil != keyval) { + set(keyval, Pair,tail, top); + continue; + } + set(operand, Symbol,value, top); + continue; + } + case iCLOSE: { + oop func = code[pc++]; + push(newClosure(func, env)); + continue; + } + case iCALL: { + int argc = _integerValue(code[pc++]); + oop func = pop(); + switch (getType(func)) { + case Primitive: { + oop result = get(func, Primitive,function)(argc, stack + sp, nil); + sp += argc; // pop all arguments + push(result); + continue; // next instruction + } + case Closure: { + Object *function = get(func, Closure,function); + Object *environment = get(func, Closure,environment); + Object *parameters = get(function, Function,parameters); + int parc = get(parameters, Array,size); + oop *parv = get(parameters, Array,elements); + int parn = 0; + while (parn < parc && argc > 0) { + environment = newPair(newPair(parv[parn++], pop()), environment); + --argc; + } + while (parn < parc) + environment = newPair(newPair(parv[parn++], nil), environment); + sp += argc; + if (fp < 1) fatal("too many function calls"); + --fp; + frames[fp].env = env; env = environment; + frames[fp].code = code; code = get(function, Function,code); + frames[fp].pc = pc; pc = 0; + assert(code != 0); + continue; + } + default: + fatal("cannot call value of type %d", getType(func)); + } + continue; + } + case iRETURN: { + assert(fp < 32); + env = frames[fp].env; + code = frames[fp].code; + pc = frames[fp].pc; + ++fp; + continue; + } + case iJMP: { + int dest = _integerValue(code[pc++]); + pc = dest; + continue; + } + case iJMPF: { + int dest = _integerValue(code[pc++]); + oop cond = pop(); + if (nil == cond) pc = dest; + continue; + } + } + } + assert(!"this cannot happen"); + return 0; +} + +#define EMITo(O) Array_append(program, (O)) +#define EMITi(I) EMITo(newInteger(I)) + +#define EMIToo(O, P) (( EMITo(O), EMITo(P) )) +#define EMITio(I, P) EMIToo(newInteger(I), P) +#define EMITii(I, J) EMIToo(newInteger(I), newInteger(J)) + +oop compileFunction(oop exp); + +void compileOn(oop exp, oop program, oop cs, oop bs) +{ + switch (getType(exp)) { + case Undefined: EMITio(iPUSH, exp); return; + case Input: EMITio(iPUSH, exp); return; + case Integer: EMITio(iPUSH, exp); return; + case Float: EMITio(iPUSH, exp); return; + case Symbol: EMITio(iGETGVAR, exp); return; + case Pair: EMITio(iPUSH, exp); return; + case String: EMITio(iPUSH, exp); return; + case Array: assert(!"unimplemented"); + case Primitive: EMITio(iPUSH, exp); return; + case Closure: EMITio(iPUSH, exp); return; + case Function: { + assert(0 == get(exp, Function,code)); + oop prog2 = compileFunction(get(exp, Function,body)); + set(exp, Function,code, get(prog2, Array,elements)); + EMITio(iCLOSE, exp); + return; + } + case Call: { + Object *args = get(exp, Call,arguments); + int argc = get(args, Array,size); + oop *argv = get(args, Array,elements); + for (int n = argc; n--;) compileOn(argv[n], program, cs, bs); + compileOn(get(exp, Call,function), program, cs, bs); // GETVAR print + EMITii(iCALL, argc); + return; + } + case Block: { + oop statements = get(exp, Block,statements); + int size = get(statements, Array,size); + if (0 == size) { + EMITio(iPUSH, nil); + return; + } + oop *exps = get(statements, Array,elements); + for (int i = 0; i < size - 1; ++i) { + compileOn(exps[i], program, cs, bs); + EMITi(iPOP); + } + compileOn(exps[size - 1], program, cs, bs); + return; + } + case Unary: { + compileOn(get(exp, Unary,rhs), program, cs, bs); + switch (get(exp, Unary,operator)) { + case NEG: EMITi(iNEG); return; + case NOT: EMITi(iNOT); return; + case COM: EMITi(iCOM); return; + case DEREF: EMITi(iDEREF); return; + case REF: assert(!"unimplemented"); + case PREINC: assert(!"unimplemented"); + case PREDEC: assert(!"unimplemented"); + case POSTINC: assert(!"unimplemented"); + case POSTDEC: assert(!"unimplemented"); + } + break; + } + case Binary: { // MUL{op, lhs, rhs} + switch (get(exp, Binary,operator)) { + case LAND: assert(!"unimplemented"); + case LOR: assert(!"unimplemented"); + case ASSIGN: { + oop symbol = get(exp, Binary,lhs); + oop expr = get(exp, Binary,rhs); + compileOn(expr, program, cs, bs); + EMITio(iSETGVAR, symbol); + return; + } + default: break; + } + compileOn(get(exp, Binary,lhs), program, cs, bs); + compileOn(get(exp, Binary,rhs), program, cs, bs); + switch (get(exp, Binary,operator)) { + case INDEX: assert(!"unimplemented"); + case MUL: EMITi(iMUL); return; + case DIV: EMITi(iDIV); return; + case MOD: EMITi(iMOD); return; + case ADD: EMITi(iADD); return; + case SUB: EMITi(iSUB); return; + case SHL: EMITi(iSHL); return; + case SHR: EMITi(iSHR); return; + case LT: EMITi(iLT); return; + case LE: EMITi(iLE); return; + case GE: EMITi(iGE); return; + case GT: EMITi(iGT); return; + case EQ: EMITi(iEQ); return; + case NE: EMITi(iNE); return; + case BAND: EMITi(iAND); return; + case BXOR: EMITi(iXOR); return; + case BOR: EMITi(iOR); return; + case LAND: + case LOR: + case ASSIGN: assert(!"this cannot happen"); + } + } + + case Cast: { + assert(!"unimplemented"); + return; + } + +# define LABEL(NAME) int NAME = get(program, Array,size) +# define PATCH(J, L) Array_set(program, J+1, newInteger(L)) + + case While: { + oop continues = newArray(); + oop breaks = newArray(); + oop cond = get(exp, While,condition); + oop body = get(exp, While,expression); + EMITio(iPUSH, nil); + LABEL(L1); + compileOn(cond, program, cs, bs); // break/continue apply to enclosing loop + LABEL(J1); + EMITio(iJMPF, nil); + EMITi(iPOP); + compileOn(body, program, continues, breaks); + EMITii(iJMP, L1); + LABEL(L2); + PATCH(J1, L2); + for (int i = get(continues, Array,size); i--;) + PATCH(_integerValue(get(continues, Array,elements)[i]), L1); + for (int i = get(breaks, Array,size); i--;) + PATCH(_integerValue(get(breaks, Array,elements)[i]), L2); + return; + } + case For: { + assert(!"unimplemented"); + return; + } + case If: { + oop cond = get(exp, If,condition); + oop conseq = get(exp, If,consequent); + oop altern = get(exp, If,alternate); + compileOn(cond, program, cs, bs); + LABEL(J1); + EMITio(iJMPF, nil); // L1 + compileOn(conseq, program, cs, bs); + LABEL(J2); + EMITio(iJMP, nil); // L2 + LABEL(L1); + compileOn(altern, program, cs, bs); + LABEL(L2); + PATCH(J1, L1); + PATCH(J2, L2); + return; + } + case Return: assert(!"unimplemented"); + case Continue: { + if (nil == cs) fatal("continue outside loop"); + EMITio(iPUSH, nil); + LABEL(L1); + EMITio(iJMP, nil); + Array_append(cs, newInteger(L1)); + return; + } + case Break: { + if (nil == bs) fatal("continue outside loop"); + EMITio(iPUSH, nil); + LABEL(L1); + EMITio(iJMP, nil); + Array_append(bs, newInteger(L1)); + return; + } + case Type: assert(!"unimplemented"); return; + case Struct: assert(!"unimplemented"); return; + case VarDecls: assert(!"unimplemented"); return; + case FunDefn: assert(!"unimplemented"); return; + } +} + +oop compileFunction(oop exp) +{ + oop program = newArray(); + compileOn(exp, program, nil, nil); + EMITi(iRETURN); + if (opt_v > 2) disassemble(program); + return program; +} + +oop compile(oop exp) // 6*7 +{ + oop program = newArray(); + compileOn(exp, program, nil, nil); + EMITi(iHALT); + if (opt_v > 2) disassemble(program); + return program; +} + +void replFile(char *name, FILE *file) +{ + input = pushInput(name, file); + + while (input) { + if (yyparse() && yysval) { + if (opt_v > 1) println(yysval); + if (0*!opt_x) { + oop result = nil; + if (opt_O) { + oop program = compile(yysval); + result = execute(program); + } + else { + switch (nlrPush()) { + case NLR_INIT: break; + case NLR_RETURN: fatal("return outside function"); + case NLR_CONTINUE: fatal("continue outside loop"); + case NLR_BREAK: fatal("break outside loop"); + } + result = eval(yysval, nil); + nlrPop(); + } + if (opt_v > 0) { + printf("=> "); + println(result); + } + } + } + } +} + +void replPath(char *path) +{ + FILE *file = fopen(path, "r"); + if (!file) fatal("%s: %s", path, strerror(errno)); + replFile(path, file); +} + +int main(int argc, char **argv) +{ + true = newSymbol("true"); + Type_void = newType("void"); + Type_char = newType("char"); + Type_int = newType("int"); + + define(intern("print" ), newPrimitive(prim_print, "print" ), nil); + define(intern("memsize"), newPrimitive(prim_memsize, "memsize"), nil); + + int repls = 0; + + for (int argn = 1; argn < argc;) { + char *arg = argv[argn++]; + if (*arg != '-') { + replPath(arg); + ++repls; + } + else { + while (*++arg) { + switch (*arg) { + case 'O': ++opt_O; continue; + case 'v': ++opt_v; continue; + case 'x': ++opt_x; continue; + default: fatal("uknown option '%c'", *arg); + } + } + } + } + + if (!repls) replFile("stdin", stdin); + + return 0; +} diff --git a/stdio.h b/stdio.h new file mode 100644 index 0000000..941a772 --- /dev/null +++ b/stdio.h @@ -0,0 +1 @@ +// here is stdio diff --git a/stdlib.h b/stdlib.h new file mode 100644 index 0000000..e69de29 diff --git a/string.h b/string.h new file mode 100644 index 0000000..e69de29 diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..722750b --- /dev/null +++ b/test.txt @@ -0,0 +1,11 @@ +// -*- C -*- + +#include + +#include "myfile.c" + +int main() +{ + printf("hello, world\n"); + return 0; +}