# main.leg -- C parser + interpreter # # Last edited: 2025-01-22 11:03:33 by piumarta on zora %{ ; #include #include #include #include #include #include #include #include #define TRACE printf("TRACE %s:%d:%s\n", __FILE__, __LINE__, __PRETTY_FUNCTION__); void fatal(char *fmt, ...) { va_list ap; va_start(ap, fmt); fprintf(stderr, "\n"); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); exit(1); } #define USEGC 1 #if USEGC # include # define MALLOC(N) GC_malloc(N) # define REALLOC(P, N) GC_realloc(P, N) # define FREE(P) GC_free(P) #else # define MALLOC(N) malloc(N) # define REALLOC(P, N) realloc(P, N) # define free(P) free(P) #endif #define TAGBITS 2 #define TAGMASK ((1UL << TAGBITS) - 1) #if TAGBITS >= 1 # define TAGPTR 0b00 # define TAGINT 0b01 # if TAGBITS >= 2 # define TAGFLOAT 0b10 # endif #endif #define indexableSize(A) (sizeof(A) / sizeof(*(A))) typedef union Object Object, *oop; #define YYSTYPE oop #define _do_types(_) \ _(Undefined) _(Input) _(Integer) _(Float) _(Symbol) _(Pair) _(String) _(Array) \ _(Closure) _(Call) _(Block) _(Unary) _(Binary) _(Assign) _(Cast) \ _(While) _(For) _(If) _(Return) _(Continue) _(Break) \ _(Tbase) _(Tpointer) _(Tarray) _(Tstruct) _(Tfunction) \ _(Scope) _(TypeName) _(Variable) _(Constant) _(Function) _(Primitive) \ _(VarDecls) typedef enum { # define _(X) X, _do_types(_) # undef _ } type_t; typedef enum { NEG, NOT, COM, DEREF, REF, PREINC, PREDEC, POSTINC, POSTDEC } unary_t; typedef enum { INDEX, MUL, DIV, MOD, ADD, SUB, SHL, SHR, LT, LE, GE, GT, EQ, NE, BAND, BXOR, BOR, LAND, LOR, } binary_t; typedef oop (* prim_t)(int nargs, oop *arguments, oop environment); struct Undefined { type_t _type; }; struct Input { type_t _type; char *name; FILE *file; oop next; }; struct Integer { type_t _type; long value; }; struct Float { type_t _type; double value; }; struct Symbol { type_t _type; char *name; oop value; }; struct Pair { type_t _type; oop head, tail; }; struct String { type_t _type; int size; char *elements; }; struct Array { type_t _type; int size; oop *elements; }; struct Closure { type_t _type; oop function, environment; }; struct Call { type_t _type; oop function, arguments; }; struct Block { type_t _type; oop statements; }; struct Unary { type_t _type; unary_t operator; oop rhs; }; struct Binary { type_t _type; binary_t operator; oop lhs, rhs; }; struct Assign { type_t _type; oop lhs, rhs; }; struct Cast { type_t _type; oop type, declarator, rhs; }; struct While { type_t _type; oop condition, expression; }; struct For { type_t _type; oop initialiser, condition, update, body; }; struct If { type_t _type; oop condition, consequent, alternate; }; struct Return { type_t _type; oop value; }; struct Continue { type_t _type; }; struct Break { type_t _type; }; struct Tbase { type_t _type; char *name; int size; }; struct Tpointer { type_t _type; oop target; }; struct Tarray { type_t _type; oop target; oop size; }; struct Tstruct { type_t _type; oop tag, members; }; struct Tfunction { type_t _type; oop result, parameters; }; struct Scope { type_t _type; oop names, types, values; }; struct TypeName { type_t _type; oop name, type; }; struct Variable { type_t _type; oop name, type, value; }; struct Constant { type_t _type; oop name, type, value; }; struct Function { type_t _type; oop name, type, parameters, body, *code; }; struct Primitive { type_t _type; oop name, type; prim_t function; }; struct VarDecls { type_t _type; oop type, declarations, variables; }; union Object { type_t _type; struct Input Input; struct Integer Integer; struct Float Float; struct Symbol Symbol; struct Pair Pair; struct String String; struct Array Array; struct Primitive Primitive; struct Closure Closure; struct Call Call; struct Block Block; struct Unary Unary; struct Binary Binary; struct Assign Assign; struct Cast Cast; struct For For; struct While While; struct If If; struct Return Return; struct Continue Continue; struct Break Break; struct Tbase Tbase; struct Tpointer Tpointer; struct Tarray Tarray; struct Tstruct Tstruct; struct Tfunction Tfunction; struct Scope Scope; struct TypeName TypeName; struct Variable Variable; struct Constant Constant; struct Function Function; struct VarDecls VarDecls; }; int opt_O = 0; // optimise (use VM) int opt_v = 0; // verbose (print eval output, parser output, compiled code) int opt_x = 0; // disable execution Object _nil = { ._type = Undefined }; #define nil (&_nil) #define false (&_nil) oop true = 0; oop _new(size_t size, type_t type) { oop obj = MALLOC(size); obj->_type = type; return obj; } #define new(TYPE) _new(sizeof(struct TYPE), TYPE) #define CTOR0(Type) \ oop new##Type(void) { \ return new(Type); \ } #define CTOR1(Type, A) \ oop new##Type(oop A) { \ oop obj = new(Type); \ obj->Type.A = A; \ return obj; \ } #define CTOR2(Type, A, B) \ oop new##Type(oop A, oop B) { \ oop obj = new(Type); \ obj->Type.A = A; \ obj->Type.B = B; \ return obj; \ } #define CTOR3(Type, A, B, C) \ oop new##Type(oop A, oop B, oop C) { \ oop obj = new(Type); \ obj->Type.A = A; \ obj->Type.B = B; \ obj->Type.C = C; \ return obj; \ } #define CTOR4(Type, A, B, C, D) \ oop new##Type(oop A, oop B, oop C, oop D) { \ oop obj = new(Type); \ obj->Type.A = A; \ obj->Type.B = B; \ obj->Type.C = C; \ obj->Type.D = D; \ return obj; \ } oop newInteger(long value) { # if TAGINT value <<= 1; // make room for bit on right value |= 1; // set it to 1 return (oop )(intptr_t)value; # else oop obj = new(Integer); obj->Integer.value = value; return obj; # endif } oop newFloat(double value) { # if TAGFLOAT union { double d; intptr_t i; oop p; } u; u.d = value; u.i &= ~TAGMASK; u.i |= TAGFLOAT; return u.p; # else oop obj = new(Float); obj->Float.value = value; return obj; # endif } char *typeName(int type) { static char *typeNames[] = { # define _(X) #X, _do_types(_) # undef _ }; if (type < 0 || type >= indexableSize(typeNames)) fatal("unknown type %d", type); return typeNames[type]; } type_t getType(oop obj) { # if TAGINT if ((intptr_t)obj & 1) return Integer; # endif # if TAGFLOAT if (((intptr_t)obj & TAGMASK) == TAGFLOAT) return Float; # endif return obj->_type; } char *getTypeName(oop obj) { return typeName(getType(obj)); } int is(type_t type, oop obj) { return type == getType(obj); } oop _check(oop obj, type_t type, char *file, int line) { if (type != getType(obj)) fatal("%s:%d: expected %s, got %s", file, line, typeName(type), getTypeName(obj)); return obj; } #define get(OBJ, TYPE, MEMBER) (_check(OBJ, TYPE, __FILE__, __LINE__)->TYPE.MEMBER) #define set(OBJ, TYPE, MEMBER, VALUE) (_check(OBJ, TYPE, __FILE__, __LINE__)->TYPE.MEMBER = (VALUE)) long _integerValue(oop obj) { # if TAGINT assert(is(Integer, obj)); return (intptr_t)obj >> 1; # else return get(obj, Integer,value); # endif } double _floatValue(oop obj) { # if TAGFLOAT union { double d; oop p; } u; u.p = obj; return u.d; # else return get(obj, Float,value); # endif } long integerValue(oop obj) { switch (getType(obj)) { case Integer: return _integerValue(obj); case Float: return _floatValue(obj); default: break; } fatal("cannot convert type %d to integer", getType(obj)); return 0; } double floatValue(oop obj) { switch (getType(obj)) { case Integer: return _integerValue(obj); case Float: return _floatValue(obj); default: break; } fatal("cannot convert type %d to float", getType(obj)); return 0; } oop newSymbol(char *name) { oop obj = new(Symbol); obj->Symbol.name = strdup(name); obj->Symbol.value = nil; return obj; } char *symbolName(oop obj) { return get(obj, Symbol,name); } oop *symbols = 0; int nsymbols = 0; oop intern(char *name) { // find existing int lo = 0, hi = nsymbols - 1; while (lo <= hi) { int mid = (lo + hi) / 2; oop sym = symbols[mid]; int cmp = strcmp(name, get(sym, Symbol,name)); if (cmp < 0) hi = mid - 1; else if (cmp > 0) lo = mid + 1; else return sym; // target found } // create new oop sym = newSymbol(name); // sizeof Symbol // insert new symbol at index lo (where sym would have been found) symbols = REALLOC(symbols, sizeof(*symbols) * (nsymbols + 1)); memmove(symbols + lo + 1, // move entries to this location in the array symbols + lo, // move entries from this location sizeof(*symbols) * (nsymbols - lo) // element size * number to move ); symbols[lo] = sym; ++nsymbols; return sym; } CTOR2(Pair, head, tail); oop head(oop pair) { return get(pair, Pair,head); } oop tail(oop pair) { return get(pair, Pair,tail); } oop assoc(oop alist, oop key) { while (is(Pair, alist)) { oop pair = head(alist); if (key == get(pair, Pair,head)) return pair; alist = tail(alist); } return nil; } oop newString(void) { oop obj = new(String); obj->String.elements = 0; // empty string obj->String.size = 0; return obj; } oop newStringWith(char *s) { oop obj = new(String); obj->String.elements = strdup(s); obj->String.size = strlen(s); return obj; } int String_append(oop string, int element) { char *elements = get(string, String,elements); int size = get(string, String,size); elements = REALLOC(elements, sizeof(*elements) * (size + 1)); set(string, String,elements, elements); set(string, String,size, size + 1); return elements[size] = element; } char *String_appendAll(oop string, char *chars, int len) { char *elements = get(string, String,elements); int size = get(string, String,size); int newSize = size + len; elements = REALLOC(elements, sizeof(*elements) * newSize); memcpy(elements + size, chars, len); set(string, String,elements, elements); set(string, String,size, newSize); return chars; } #define Array_do(ARR, VAR) \ for (oop do_array = (ARR), VAR = nil; do_array; do_array = 0) \ for (int do_size = get(do_array, Array,size), do_index = 0; \ do_index < do_size && (VAR = do_array->Array.elements[do_index]); \ ++do_index) oop newArray(void) { oop obj = new(Array); obj->Array.elements = 0; // empty array obj->Array.size = 0; return obj; } oop Array_append(oop array, oop element) { oop *elements = get(array, Array,elements); int size = get(array, Array,size); elements = REALLOC(elements, sizeof(*elements) * (size + 1)); set(array, Array,elements, elements); set(array, Array,size, size + 1); return elements[size] = element; } oop newArray1(oop a) { oop obj = newArray(); Array_append(obj, a); return obj; } oop newArray2(oop a, oop b) { oop obj = newArray1(a); Array_append(obj, b); return obj; } int Array_size(oop array) { return get(array, Array,size); } oop Array_last(oop array) { int size = get(array, Array,size); oop *elts = get(array, Array,elements); assert(size > 0); return elts[size - 1]; } oop Array_popLast(oop array) { int size = get(array, Array,size); oop *elts = get(array, Array,elements); assert(size > 0); oop last = elts[--size]; elts[size] = nil; set(array, Array,size, size); return last; } oop Array_get(oop array, int index) { oop *elements = get(array, Array,elements); int size = get(array, Array,size); if (index >= size) fatal("array index %d out of bounds %d", index, size); return elements[index]; } oop Array_set(oop array, int index, oop element) { oop *elements = get(array, Array,elements); int size = get(array, Array,size); if (index >= size) fatal("array index %d out of bounds %d", index, size); return elements[index] = element; } int Array_equal(oop array, oop brray) { if (Array_size(array) != Array_size(brray)) return 0; Array_do(array, a) { oop b = get(brray, Array,elements)[do_index]; if (a != b) return 0; } return 1; } struct keyval { oop key, val; }; oop newMap(void) { return newArray(); } int Map_find(oop map, oop key) { int size = get(map, Array,size) / 2; struct keyval *kvs = (struct keyval *)get(map, Array,elements); int lo = 0, hi = size - 1; while (lo <= hi) { int mi = (lo + hi) / 2; if (key < kvs[mi].key) hi = mi - 1; else if (key > kvs[mi].key) lo = mi + 1; else return mi; } return -1 - lo; // not found, encoding expected location } oop Map_set(oop map, oop key, oop val) { int size = get(map, Array,size) / 2; struct keyval *kvs = (struct keyval *)get(map, Array,elements); int index = Map_find(map, key); if (index > 0) return kvs[index].val = val; index = -1 - index; int last = size++; kvs = realloc(kvs, sizeof(*kvs) * size); while (last > index) { kvs[last] = kvs[last - 1]; --last; } kvs[index].key = key; return kvs[index].val = val; } oop Map_get(oop map, oop key) { struct keyval *kvs = (struct keyval *)get(map, Array,elements); int index = Map_find(map, key); if (index < 0) fatal("key not found in map"); return kvs[index].val; } CTOR2(Closure, function, environment); CTOR2(Call, function, arguments); CTOR1(Block, statements); oop newUnary(unary_t operator, oop operand) { oop obj = new(Unary); obj->Unary.operator = operator; obj->Unary.rhs = operand; return obj; } oop newBinary(binary_t operator, oop lhs, oop rhs) { oop obj = new(Binary); obj->Binary.operator = operator; obj->Binary.lhs = lhs; obj->Binary.rhs = rhs; return obj; } CTOR2(Assign, lhs, rhs); CTOR3(Cast, type, declarator, rhs); CTOR2(While, condition, expression); CTOR4(For, initialiser, condition, update, body); CTOR3(If, condition, consequent, alternate); CTOR1(Return, value); CTOR0(Continue); CTOR0(Break); void println(oop obj); char *toString(oop obj); oop newTbase(char *name, int size) { oop obj = new(Tbase); obj->Tbase.name = name; obj->Tbase.size = size; return obj; } oop t_void = 0; oop t_char = 0; oop t_int = 0; oop t_float = 0; oop t_string = 0; oop newTpointer(oop target) { static oop pointers = 0; if (!pointers) pointers = newArray(); Array_do(pointers, t) if (target == get(t, Tpointer,target)) return t; // uniqe types allow comparison by identity oop obj = new(Tpointer); obj->Tpointer.target = target; Array_append(pointers, obj); return obj; } oop newTarray(oop target, oop size) { static oop arrays = 0; if (!arrays) arrays = newArray(); Array_do(arrays, t) if (target == get(t, Tarray,target) && size == get(t, Tarray,size)) return t; // uniqe types allow comparison by identity oop obj = new(Tarray); obj->Tarray.target = target; obj->Tarray.size = size; Array_append(arrays, obj); return obj; } oop newTstruct(oop tag, oop members) { oop obj = new(Tstruct); obj->Tstruct.tag = tag; obj->Tstruct.members = members; return obj; } oop vars2types(oop vars) { oop types = newArray(); Array_do(vars, var) Array_append(types, get(var, Variable,type)); return types; } oop newTfunction(oop result, oop parameters) { static oop functions = 0; if (!functions) functions = newArray(); Array_do(functions, t) { oop tres = get(t, Tfunction,result); oop tpar = get(t, Tfunction,parameters); if (result == tres && Array_equal(parameters, tpar)) return t; // uniqe types allow comparison by identity } oop obj = new(Tfunction); obj->Tfunction.result = result; obj->Tfunction.parameters = parameters; Array_append(functions, obj); return obj; } oop newScope(void) { oop obj = new(Scope); obj->Scope.names = newArray(); obj->Scope.types = newArray(); obj->Scope.values = newArray(); return obj; } int Scope_find(oop scope, oop name) { oop names = get(scope, Scope,names); int size = get(names, Array,size); oop *elts = get(names, Array,elements); for (int i = size; i--;) // fixme: binary search if (name == elts[i]) return i; return -1; } oop scopes = 0; void Scope_begin(void) { Array_append(scopes, newScope()); } void Scope_end(void) { Array_popLast(scopes); } oop Scope_lookup(oop name) { int n = get(scopes, Array,size); oop *elts = get(scopes, Array,elements); while (n--) { oop scope = elts[n]; int i = Scope_find(scope, name); if (i >= 0) return get(get(scope, Scope,values), Array,elements)[i]; } return 0; // NOTE: 0 means undefined (rather than nil, which means uninitialised) } oop Scope_redefine(oop name, oop value) { int n = get(scopes, Array,size); oop *elts = get(scopes, Array,elements); while (n--) { oop scope = elts[n]; int i = Scope_find(scope, name); if (i >= 0) return get(get(scope, Scope,values), Array,elements)[i] = value; } return 0; // NOTE: 0 means undefined (rather than nil, which means uninitialised) } CTOR2(TypeName, name, type); CTOR3(Variable, name, type, value); CTOR3(Constant, name, type, value); oop newFunction(oop name, oop type, oop parameters, oop body) { oop obj = new(Function); obj->Function.name = name; obj->Function.type = type; obj->Function.parameters = parameters; obj->Function.body = body; obj->Function.code = 0; return obj; } oop newPrimitive(oop name, oop type, prim_t function) { oop obj = new(Primitive); obj->Primitive.name = name; obj->Primitive.type = type; obj->Primitive.function = function; return obj; } oop makeType(oop base, oop decl); oop makeTypes(oop declarations) { int size = get(declarations, Array,size); oop *elts = get(declarations, Array,elements); oop types = newArray(); for (int i = 0; i < size; ++i) { oop vdecl = elts[i]; oop type = get(vdecl, VarDecls,type); oop decls = get(vdecl, VarDecls,declarations); int dsize = get(decls, Array,size); oop *delts = get(decls, Array,elements); for (int j = 0; j < dsize; ++j) Array_append(types, makeType(type, delts[j])); } return types; } oop makeType(oop base, oop type) { switch (getType(type)) { case Undefined: return base; case Symbol: return base; case Assign: return makeType(base, get(type, Assign,lhs)); case Tpointer: return newTpointer(makeType(base, get(type, Tpointer,target))); case Tarray: return newTarray(makeType(base, get(type, Tarray,target)), get(type, Tarray,size)); case Tfunction: return newTfunction(base, get(type, Tfunction,parameters)); default: break; } fatal("cannot make type from delcaration: %s %s", toString(base), toString(type)); return 0; } oop makeName(oop decl) { switch (getType(decl)) { case Undefined: case Symbol: return decl; case Assign: return makeName(get(decl, Assign,lhs)); case Tpointer: return makeName(get(decl, Tpointer,target)); case Tarray: return makeName(get(decl, Tarray,target)); case Tfunction: return makeName(get(decl, Tfunction,result)); default: break; } fatal("cannot make name from delcaration: %s", toString(decl)); return 0; } void VarDecls_append(oop vds, oop decl) { oop val = is(Assign, decl) ? get(decl, Assign,rhs) : nil; oop type = makeType(get(vds, VarDecls,type), decl); oop name = makeName(decl); Array_append(get(vds, VarDecls,declarations), decl); Array_append(get(vds, VarDecls,variables), newVariable(name, type, val)); } oop newVarDecls(oop type, oop decl) { oop obj = new(VarDecls); obj->VarDecls.type = type; obj->VarDecls.declarations = newArray(); obj->VarDecls.variables = newArray(); VarDecls_append(obj, decl); return obj; } #undef CTOR4 #undef CTOR3 #undef CTOR2 #undef CTOR1 #undef CTOR0 oop baseType(oop type) { switch (getType(type)) { case Tbase: return type; case Tpointer: return baseType(get(type, Tpointer,target)); case Tarray: return baseType(get(type, Tarray,target)); default: fatal("cannot find base type of %s", getTypeName(type)); } return nil; } oop toStringOn(oop obj, oop str); void declareStringOn(oop type, oop name, oop str) { switch (getType(type)) { case Tbase: toStringOn(name, str); break; case Tpointer: String_append(str, '*'); declareStringOn(get(type, Tpointer,target), name, str); break; case Tarray: { declareStringOn(get(type, Tarray,target), name, str); String_append(str, '['); toStringOn(get(type, Tarray,size), str); String_append(str, ']'); break; } case Tfunction: { declareStringOn(get(type, Tfunction,result), name, str); String_append(str, '('); Array_do(get(type, Tfunction,parameters), parameter) { if (do_index) String_appendAll(str, ", ", 2); toStringOn(parameter, str); } String_append(str, ')'); break; } default: fatal("cannot convert to declaration: %s", getTypeName(type)); } } char *declareString(oop type, oop name) { oop str = newString(); declareStringOn(type, name, str); String_append(str, 0); return get(str, String,elements); } oop toStringOn(oop obj, oop str) { int n = 0; switch (getType(obj)) { case Undefined: String_appendAll(str, "", 5); break; case Symbol: String_appendAll(str, get(obj, Symbol,name), strlen(get(obj, Symbol,name))); break; case String: String_appendAll(str, get(obj, String,elements), get(obj, String,size)); break; case Tbase: String_appendAll(str, get(obj, Tbase,name), strlen(get(obj, Tbase,name))); break; case Tpointer: { oop target = get(obj, Tpointer,target); toStringOn(target, str); if (is(Tbase, target)) String_append(str, ' '); String_append(str, '*'); break; } case Tfunction: { oop result = get(obj, Tfunction,result); oop params = get(obj, Tfunction,parameters); toStringOn(result, str); String_append(str, '('); Array_do(params, param) { if (do_index) String_appendAll(str, ", ", 2); toStringOn(param, str); } String_append(str, ')'); break; } case Variable: { oop type = get(obj, Variable,type); oop name = get(obj, Variable,name); toStringOn(baseType(type), str); if (t_void != type) { String_append(str, ' '); declareStringOn(type, name, str); } break; } case Function: { toStringOn(get(get(obj, Function,type), Tfunction,result), str); String_append(str, ' '); toStringOn(get(obj, Function,name), str); String_append(str, '('); oop params = get(obj, Function,parameters); Array_do(params, param) { if (do_index) String_appendAll(str, ", ", 2); toStringOn(param, str); } String_append(str, ')'); break; } case VarDecls: { oop vars = get(obj, VarDecls,variables); oop base = get(obj, VarDecls,type); oop decls = get(obj, VarDecls,declarations); Array_do(decls, decl) { if (do_index) String_appendAll(str, ", ", 2); toStringOn(decl, str); String_append(str, ' '); toStringOn(base, str); } break; } default: fatal("cannot convert %s to string", getTypeName(obj)); break; } return str; } char *toString(oop obj) { oop str = toStringOn(obj, newString()); String_append(str, 0); return get(str, String,elements); } void printiln(oop obj, int indent) { printf("%*s", indent*2, ""); switch (getType(obj)) { case Undefined: printf("nil\n"); break; case Input: printf("<%s>\n", get(obj, Input,name)); break; case Integer: printf("%ld\n", integerValue(obj)); break; case Float: printf("%f\n", floatValue(obj)); break; case Symbol: printf("%s\n", symbolName (obj)); break; case Pair: { printf("PAIR\n"); printiln(head(obj), indent+1); printiln(tail(obj), indent+1); break; } case String: { char *elts = get(obj, String,elements); int size = get(obj, String,size); printf("STRING %d \"", size); for (int i = 0; i < size; ++i) { int c = elts[i]; if ('"' == c) printf("\\\""); else if (31 < c && c < 127) putchar(c); else printf("\\x%02x", c); } printf("\"\n"); break; } case Array: { oop *elts = get(obj, Array,elements); int size = get(obj, Array,size); printf("ARRAY %d\n", size); for (int i = 0; i < size; ++i) printiln(elts[i], indent+1); break; } case Primitive: { printf("PRIMITIVE<%s>\n", symbolName(get(obj, Primitive,name))); break; } case Closure: { printf("CLOSURE\n"); printiln(get(obj, Closure,function), indent+1); break; } case Call: { printf("CALL\n"); printiln(get(obj, Call,function ), indent+1); printiln(get(obj, Call,arguments), indent+1); break; } case Block: { printf("BLOCK\n"); printiln(get(obj, Block,statements), indent+1); break; } case Unary: { switch (get(obj, Unary,operator)) { case NEG: printf("NEG\n"); break; case NOT: printf("NOT\n"); break; case COM: printf("COM\n"); break; case DEREF: printf("DEREF\n"); break; case REF: printf("REF\n"); break; case PREINC: printf("PREINC\n"); break; case PREDEC: printf("PREDEC\n"); break; case POSTINC: printf("POSTINC\n"); break; case POSTDEC: printf("POSTDEC\n"); break; } printiln(get(obj, Unary,rhs), indent+1); break; } case Binary: { switch (get(obj, Binary,operator)) { case INDEX: printf("INDEX\n"); break; case MUL: printf("MUL\n"); break; case DIV: printf("DIV\n"); break; case MOD: printf("MOD\n"); break; case ADD: printf("ADD\n"); break; case SUB: printf("SUB\n"); break; case SHL: printf("SHL\n"); break; case SHR: printf("SHR\n"); break; case LT: printf("LT\n"); break; case LE: printf("LE\n"); break; case GE: printf("GE\n"); break; case GT: printf("GT\n"); break; case EQ: printf("EQ\n"); break; case NE: printf("NE\n"); break; case BAND: printf("BAND\n"); break; case BXOR: printf("BXOR\n"); break; case BOR: printf("BOR\n"); break; case LAND: printf("LAND\n"); break; case LOR: printf("LOR\n"); break; } printiln(get(obj, Binary,lhs), indent+1); printiln(get(obj, Binary,rhs), indent+1); break; } case Assign: { printf("ASSIGN\n"); printiln(get(obj, Assign,lhs), indent+1); printiln(get(obj, Assign,rhs), indent+1); break; } case Cast: { printf("CAST\n"); printiln(get(obj, Cast,type ), indent+1); printiln(get(obj, Cast,declarator), indent+1); printiln(get(obj, Cast,rhs ), indent+1); break; } case While: { printf("WHILE\n"); printiln(get(obj, While,condition), indent+1); printiln(get(obj, While,expression), indent+1); break; } case For: { printf("For\n"); printiln(get(obj, For,initialiser), indent+1); printiln(get(obj, For,condition), indent+1); printiln(get(obj, For,update), indent+1); printiln(get(obj, For,body), indent+1); break; } case If: { printf("IF\n"); printiln(get(obj, If,condition), indent+1); printiln(get(obj, If,consequent), indent+1); printiln(get(obj, If,alternate), indent+1); break; } case Return: { printf("RETURN\n"); printiln(get(obj, Return,value), indent+1); break; } case Continue: { printf("CONTINUE\n"); break; } case Break: { printf("BREAK\n"); break; } case Tbase: { printf("<%s:%d>\n", get(obj, Tbase,name), get(obj, Tbase,size)); break; } case Tpointer: { printf("Tpointer\n"); printiln(get(obj, Tpointer,target), indent+1); break; } case Tarray: { printf("Tarray\n"); printiln(get(obj, Tarray,size ), indent+1); printiln(get(obj, Tarray,target), indent+1); break; } case Tstruct: { printf("Tstruct\n"); printiln(get(obj, Tstruct,tag ), indent+1); printiln(get(obj, Tstruct,members), indent+1); break; } case Tfunction: { printf("Tfunction\n"); printiln(get(obj, Tfunction,result ), indent+1); printiln(get(obj, Tfunction,parameters), indent+1); break; } case VarDecls: { printf("VarDecls\n"); printiln(get(obj, VarDecls,type ), indent+1); printiln(get(obj, VarDecls,declarations), indent+1); printiln(get(obj, VarDecls,variables ), indent+1); break; } case Scope: { printf("SCOPE\n"); printiln(get(obj, Scope,names), indent+1); break; } case TypeName: { printf("TypeName\n"); printiln(get(obj, TypeName,name), indent+1); printiln(get(obj, TypeName,type), indent+1); break; } case Variable: { printf("Variable\n"); printiln(get(obj, Variable,name ), indent+1); printiln(get(obj, Variable,type ), indent+1); printiln(get(obj, Variable,value), indent+1); break; } case Constant: { printf("Constant\n"); printiln(get(obj, Constant,name ), indent+1); printiln(get(obj, Constant,type ), indent+1); printiln(get(obj, Constant,value), indent+1); break; }; case Function: { printf("Function %s\n", toString(get(obj, Function,name))); printiln(get(obj, Function,type ), indent+1); printiln(get(obj, Function,parameters), indent+1); printiln(get(obj, Function,body ), indent+1); break; } } } void println(oop obj) { printiln(obj, 0); } oop input = 0; oop pushInput(char *name, FILE *file) { oop obj = new(Input); obj->Input.name = strdup(name); obj->Input.file = file; obj->Input.next = input; return input = obj; } void popInput(void) { if (!input) return; oop obj = input; input = get(obj, Input,next); free(get(obj, Input,name)); fclose(get(obj, Input,file)); FREE(obj); } FILE *sysOpen(char *path) { FILE *fp = fopen(path, "r"); if (!fp) fatal("#include <%s>: %s", path, strerror(errno)); return fp; } FILE *usrOpen(char *path) { FILE *fp = fopen(path, "r"); if (!fp) fatal("#include \"%s\": %s", path, strerror(errno)); return fp; } int getChar(char *buf) { while (input) { int c = getc(get(input, Input,file)); if (c != EOF) { *buf = c; return 1; } popInput(); } return 0; } #define YY_INPUT(buf, result, max_size) { result = getChar(buf); } YYSTYPE yysval = 0; void expected(oop where, char *what) { fatal("%s expected near: %.*s", what, get(where, String,size), get(where, String,elements)); } oop eval(oop exp, oop env); %} start = - ( interp { yysval = 0 } | include { yysval = 0 } | x:tldecl { yysval = x } | !. { yysval = 0 } | e:error { expected(e, "declaration") } ) error = < (![\n\r] .)* > { $$ = newStringWith(yytext) } interp = HASH PLING (![\n\r] .)* include = HASH INCLUDE ( '<' < [a-zA-Z0-9_.]* > '>' @{ pushInput(yytext, sysOpen(yytext)) } | '"' < [a-zA-Z0-9_.]* > '"' @{ pushInput(yytext, usrOpen(yytext)) } ) tldecl = fundefn | vardecl vardecl = t:tname d:inidecl { d = newVarDecls(t, d) } ( COMMA e:inidecl { VarDecls_append(d, e) } )* SEMI { $$ = d } tname = INT { $$ = t_int } | CHAR { $$ = t_char } | VOID { $$ = t_void } | struct struct = STRUCT ( i:id m:members { $$ = newTstruct( i, m) } | i:id { $$ = newTstruct(nil, m) } | m:members { $$ = newTstruct( i, nil) } | e:error { expected(e, "structure/union definition") } ) members = LBRACE vardecl* ( RBRACE | e:error { expected(e, "struct/union member specification") } ) inidecl = d:decltor ( ASSIGN ( e:initor { $$ = newAssign(d, e) } | e:error { expected(e, "variable initialiser") } ) | { $$ = d } ) decltor = STAR d:decltor { $$ = newTpointer(d) } | ddector ddector = ( LPAREN d:decltor RPAREN | d:idopt ) ( LBRAK e:expropt RBRAK { d = newTarray(d, e) } | p:params { d = newTfunction(d, vars2types(p)) } )* { $$ = d } params = LPAREN a:mkArray ( p:pdecl { Array_append(a, p) } ( COMMA p:pdecl { Array_append(a, p) } )* )? ( RPAREN { $$ = a } | e:error { expected(e, "parameter declaration") } ) pdecl = t:tname d:decltor { $$ = newVariable(makeName(d), makeType(t, d), nil) } initor = agrinit | expr agrinit = LBRACE i:mkArray ( j:initor { Array_append(i, j) } ( COMMA j:initor { Array_append(i, j) } )* COMMA? )? RBRACE { $$ = i } fundefn = t:tname d:funid p:params b:block { $$ = newFunction(makeName(d), makeType(t, d), p, b) } funid = STAR d:funid { $$ = newUnary(DEREF, d) } | LPAREN d:funid RPAREN { $$ = d } | id block = LBRACE b:mkArray ( s:stmt { Array_append(b, s) } )* ( RBRACE { $$ = newBlock(b) } | e:error { expected(e, "statement") } ) stmt = WHILE c:cond s:stmt { $$ = newWhile(c, e) } | FOR LPAREN ( i:vardecl | i:expropt SEMI ) c:expropt SEMI u:expropt RPAREN b:stmt { $$ = newFor(i, c, u, b) } | IF c:cond s:stmt ( ELSE t:stmt { $$ = newIf(c, s, t) } | { $$ = newIf(c, s, nil) } ) | RETURN e:expropt SEMI { $$ = newReturn(e) } | CONTINU SEMI { $$ = newContinue() } | BREAK SEMI { $$ = newBreak() } | block | e:expr SEMI { $$ = e } | vardecl cond = LPAREN e:expr RPAREN { $$ = e } expropt = expr | { $$ = nil } expr = assign assign = l:unary ASSIGN x:expr { $$ = newAssign(l, x) } | logor logor = l:logand ( BARBAR r:logand { l = newBinary(LOR, l, r) } )* { $$ = l } logand = l:bitor ( ANDAND r:bitor { l = newBinary(LAND, l, r) } )* { $$ = l } bitor = l:bitxor ( BAR r:bitxor { l = newBinary(BOR, l, r) } )* { $$ = l } bitxor = l:bitand ( HAT r:bitand { l = newBinary(BXOR, l, r) } )* { $$ = l } bitand = l:equal ( AND r:equal { l = newBinary(BAND, l, r) } )* { $$ = l } equal = l:inequal ( EQUAL r:inequal { l = newBinary(EQ, l, r) } | NEQUAL r:inequal { l = newBinary(NE, l, r) } )* { $$ = l } inequal = l:shift ( LESS r:shift { l = newBinary(LT, l, r) } | LESSEQ r:shift { l = newBinary(LE, l, r) } | GRTREQ r:shift { l = newBinary(GE, l, r) } | GRTR r:shift { l = newBinary(GT, l, r) } )* { $$ = l } shift = l:sum ( LSHIFT r:sum { l = newBinary(SHL, l, r) } | RSHIFT r:sum { l = newBinary(SHR, l, r) } )* { $$ = l } sum = l:prod ( PLUS r:prod { l = newBinary(ADD, l, r) } | MINUS r:prod { l = newBinary(SUB, l, r) } )* { $$ = l } prod = l:unary ( STAR r:unary { l = newBinary(MUL, l, r) } | SLASH r:unary { l = newBinary(DIV, l, r) } | PCENT r:unary { l = newBinary(MOD, l, r) } )* { $$ = l } unary = MINUS r:unary { $$ = newUnary(NEG, r) } | PLING r:unary { $$ = newUnary(NOT, r) } | TILDE r:unary { $$ = newUnary(COM, r) } | STAR r:unary { $$ = newUnary(DEREF, r) } | AND r:unary { $$ = newUnary(REF, r) } | PPLUS r:unary { $$ = newUnary(PREINC, r) } | MMINUS r:unary { $$ = newUnary(PREDEC, r) } | cast | postfix cast = LPAREN t:tname d:decltor RPAREN r:unary { $$ = newCast(t, d, r) } postfix = v:value ( a:args { v = newCall(v, a) } | i:index { v = newBinary(INDEX, v, i) } | PPLUS { v = newUnary(POSTINC, a) } | MMINUS { v = newUnary(POSTDEC, a) } )* { $$ = v } args = LPAREN a:mkArray ( e:expr { Array_append(a, e) } ( COMMA e:expr { Array_append(a, e) } )* )? RPAREN { $$ = a } index = LBRAK e:expr RBRAK { $$ = e } value = LPAREN e:expr RPAREN { $$ = e } | float | integer | string | id mkArray = { $$ = newArray() } float = < [-+]? [0-9]* '.' [0-9]+ ( [eE] [-+]? [0-9]+ )? > - { $$ = newFloat(atof(yytext)) } | < [-+]? [0-9]+ '.' [0-9]* ( [eE] [-+]? [0-9]+ )? > - { $$ = newFloat(atof(yytext)) } | < [-+]? [0-9]+ '.'? [0-9]* ( [eE] [-+]? [0-9]+ ) > - { $$ = newFloat(atof(yytext)) } integer = "0x" < [0-9a-fA-F]+ > - { $$ = newInteger(strtol(yytext, 0, 16)) } | "0b" < [0-1]+ > - { $$ = newInteger(strtol(yytext, 0, 2)) } | < [0-9]+ > - { $$ = newInteger(strtol(yytext, 0, 10)) } | "'" !"'" c:char "'" - { $$ = c } mkStr = { $$ = newString() } string = '"' s:mkStr ( !'"' c:char { String_append(s, _integerValue(c)) } )* '"' - { $$ = s } char = '\\' e:escaped { $$ = e } | < . > { $$ = newInteger(yytext[0]) } escaped = 'a' { $$ = newInteger('\a') } | 'b' { $$ = newInteger('\b') } | 'f' { $$ = newInteger('\f') } | 'n' { $$ = newInteger('\n') } | 'r' { $$ = newInteger('\r') } | 't' { $$ = newInteger('\t') } | 'v' { $$ = newInteger('\v') } | "'" { $$ = newInteger('\'') } | '"' { $$ = newInteger('\"') } | '\\' { $$ = newInteger('\\') } | < OCT OCT? OCT? > { $$ = newInteger(strtol(yytext, 0, 8)) } | 'x' < HEX? HEX? > { $$ = newInteger(strtol(yytext, 0, 16)) } | 'u' < HEX? HEX? HEX? HEX? > { $$ = newInteger(strtol(yytext, 0, 16)) } OCT = [0-7] HEX = [0-9a-fA-F] idopt = id | { $$ = nil } id = !keyword < alpha alnum* > - { $$ = intern(yytext) } keyword = VOID | CHAR | INT | STRUCT | IF | ELSE | WHILE | FOR | RETURN | CONTINU | BREAK alpha = [a-zA-Z_] alnum = [a-zA-Z_0-9] - = blank* blank = [ \t\n\r] | comment comment = "//" < (![\n\r] .)* > | "/*" (!"*/" .)* "*/" HASH = "#" - INCLUDE = "include" ![_a-zA-Z0-9] - VOID = "void" ![_a-zA-Z0-9] - CHAR = "char" ![_a-zA-Z0-9] - INT = "int" ![_a-zA-Z0-9] - STRUCT = "struct" ![_a-zA-Z0-9] - # UNION = "union" ![_a-zA-Z0-9] - # ENUM = "enum" ![_a-zA-Z0-9] - IF = "if" ![_a-zA-Z0-9] - ELSE = "else" ![_a-zA-Z0-9] - WHILE = "while" ![_a-zA-Z0-9] - FOR = "for" ![_a-zA-Z0-9] - RETURN = "return" ![_a-zA-Z0-9] - CONTINU = "continue" ![_a-zA-Z0-9] - BREAK = "break" ![_a-zA-Z0-9] - ASSIGN = "=" !"=" - PLUS = "+" !"+" - PPLUS = "++" - MINUS = "-" !"-" - MMINUS = "--" - STAR = "*" - BAR = "|" !"|" - BARBAR = "||" - AND = "&" !"&" - ANDAND = "&&" - HAT = "^" - EQUAL = "==" - NEQUAL = "!=" - LESS = "<" ![=<] - LESSEQ = "<=" - GRTREQ = ">=" - GRTR = ">" ![=>] - LSHIFT = "<<" - RSHIFT = ">>" - SLASH = "/" - PCENT = "%" - PLING = "!" !"=" - TILDE = "~" - LPAREN = "(" - RPAREN = ")" - LBRAK = "[" - RBRAK = "]" - LBRACE = "{" - RBRACE = "}" - COMMA = "," - SEMI = ";" - %% ; #include enum { NLR_INIT = 0, NLR_RETURN, NLR_CONTINUE, NLR_BREAK }; Object *nlrValue = 0; jmp_buf *nlrStack = 0; int nlrCount = 0; int nlrMax = 0; void _nlrPush(void) { if (nlrCount >= nlrMax) nlrStack = realloc(nlrStack, sizeof(*nlrStack) * (nlrMax += 8)); } #define nlrPush() setjmp((_nlrPush(), nlrStack[nlrCount++])) oop nlrPop(void) { assert(nlrCount > 0); --nlrCount; return nlrValue; } #define nlrReturn(TYPE, VAL) ((nlrValue = (VAL), longjmp(nlrStack[nlrCount-1], TYPE))) #define IBINOP(L, OP, R) newInteger(integerValue(L) OP integerValue(R)) #define IRELOP(L, OP, R) (integerValue(L) OP integerValue(R) ? true : false) #define FBINOP(L, OP, R) newFloat(floatValue(L) OP floatValue(R)) #define FRELOP(L, OP, R) (floatValue(L) OP floatValue(R) ? true : false) #define isNil(O) ((O) == nil) #define isFalse(O) ((O) == nil) #define isTrue(O) ((O) != nil) void defineVariable(oop name, oop type, oop value); oop apply(oop function, oop arguments, oop env) { if (opt_v > 2) { printf("APPLY "); println(function); } switch (getType(function)) { default: { fatal("type %s is not callable", getTypeName(function)); } case Primitive: { oop argv = newArray(); Array_do(arguments, arg) Array_append(argv, eval(arg, nil)); return get(function, Primitive,function) ( get(argv, Array,size), get(argv, Array,elements), env ); } case Function: { oop parameters = get(function, Function,parameters); oop body = get(function, Function,body); int parc = get(parameters, Array,size); int argc = get(arguments, Array,size); if (parc != argc) fatal("wrong number of arguments, expected %d got %d", parc, argc); oop *parv = get(parameters, Array,elements); oop *argv = get(arguments, Array,elements); Scope_begin(); for (int i = 0; i < argc; ++i) { oop var = parv[i]; oop arg = argv[i]; defineVariable(get(var, Variable,name), get(var, Variable,type), eval(arg, nil)); } switch (nlrPush()) { // longjmp occurred case NLR_INIT: break; case NLR_RETURN: Scope_end(); return nlrPop(); case NLR_CONTINUE: fatal("continue outside loop"); case NLR_BREAK: fatal("break outside loop"); } oop result = eval(body, nil); Scope_end(); nlrPop(); return result; } } } void define(oop name, oop value) { oop scope = Array_last(scopes); int index = Scope_find(scope, name); // searches active scope only if (index >= 0) { oop old = Scope_lookup(name); assert(old); switch (getType(old)) { case Variable: { oop oldtype = get(old, Variable,type); if (is(Tfunction, oldtype)) { switch (getType(value)) { case Variable: { oop valtype = get(value, Variable,type); if (oldtype == valtype) return; // function declaration printf("FUNCTION FORWARD TYPE MISMATCH 1\n"); break; } case Function: { // replace forard declaration with actual function Scope_redefine(name, value); return; } default: break; } } break; } case Function: { if (is(Variable, value)) { oop oldtype = get(old, Function,type); oop valtype = get(old, Variable,type); if (oldtype == valtype) return; // compatible redeclaration printf("FUNCTION FORWARD TYPE MISMATCH 2\n"); } break; } default: break; } fatal("name '%s' redefined\n", get(name, Symbol,name)); } Array_append(get(scope, Scope,names ), name ); Array_append(get(scope, Scope,values), value); } void defineTypeName(oop name, oop type) { define(name, newTypeName(name, type)); } void defineVariable(oop name, oop type, oop value) { define(name, newVariable(name, type, value)); } void defineConstant(oop name, oop type, oop value) { define(name, newConstant(name, type, value)); } void defineFunction(oop name, oop type, oop parameters, oop body) { define(name, newFunction(name, type, parameters, body)); } void definePrimitive(oop name, oop type, prim_t function) { define(name, newPrimitive(name, type, function)); } int VarDecls_finalise(oop vds) { oop vars = get(vds, VarDecls,variables); if (nil == vars) { assert(nil == vars); oop base = get(vds, VarDecls,type ); oop decls = get(vds, VarDecls,declarations); vars = newArray(); Array_do(decls, decl) { oop name = makeName(decl); oop type = makeType(base, decl); Array_append(vars, newVariable(name, type, nil)); } set(vds, VarDecls,variables, vars); } return get(vars, Array,size); } oop eval(oop exp, oop env) { if (opt_v > 2) { printf("EVAL "); println(exp); } switch (getType(exp)) { case Undefined: assert(!"this cannot happen"); case Input: assert(!"this cannot happen"); case Integer: return exp; case Float: return exp; case Symbol: { oop value = Scope_lookup(exp); if (!value) fatal("'%s' is undefined\n", get(exp, Symbol,name)); if (isNil(value)) fatal("'%s' is uninitialised\n", get(exp, Symbol,name)); return value; } case Pair: assert(!"this cannot happen"); case String: return exp; case Array: assert(!"this cannot happen"); case Primitive: return exp; case Closure: return exp; case Call: { oop fun = eval(get(exp, Call,function), env); oop args = get(exp, Call,arguments); return apply(fun, args, nil); } case Block: { Object *stmts = get(exp, Block,statements); int size = get(stmts, Array,size); oop *elts = get(stmts, Array,elements); Object *result = nil; Scope_begin(); for (int i = 0; i < size; ++i) { result = eval(elts[i], env); } Scope_end(); return result; } case Unary: { oop rhs = eval(get(exp, Unary,rhs), env); switch (get(exp, Unary,operator)) { case NEG: return ( is(Float, rhs) ? newFloat (-floatValue (rhs)) : newInteger(-integerValue(rhs)) ); case NOT: return isFalse(rhs) ? true : false; case COM: return newInteger(~integerValue(rhs)); case DEREF: assert(!"unimplemented"); case REF: assert(!"unimplemented"); case PREINC: assert(!"unimplemented"); case PREDEC: assert(!"unimplemented"); case POSTINC: assert(!"unimplemented"); case POSTDEC: assert(!"unimplemented"); } break; } case Binary: { oop lhs = get(exp, Binary,lhs); oop rhs = get(exp, Binary,rhs); switch (get(exp, Binary,operator)) { case LAND: return isFalse(eval(lhs, env)) ? false : eval(rhs, env); case LOR: return isTrue (eval(lhs, env)) ? true : eval(rhs, env); default: { lhs = eval(lhs, env); rhs = eval(rhs, env); if (Float == getType(lhs) || Float == getType(rhs)) { // floating point result switch (get(exp, Binary,operator)) { case INDEX: assert(!"unimplemented"); case MUL: return FBINOP(lhs, * , rhs); case DIV: return FBINOP(lhs, / , rhs); case MOD: return newFloat(fmod(floatValue(lhs), floatValue(rhs))); case ADD: return FBINOP(lhs, + , rhs); case SUB: return FBINOP(lhs, - , rhs); case SHL: return IBINOP(lhs, <<, rhs); case SHR: return IBINOP(lhs, >>, rhs); case LT: return FRELOP(lhs, < , rhs); case LE: return FRELOP(lhs, <=, rhs); case GE: return FRELOP(lhs, >=, rhs); case GT: return FRELOP(lhs, > , rhs); case EQ: return FRELOP(lhs, == , rhs); case NE: return FRELOP(lhs, !=, rhs); case BAND: return IBINOP(lhs, & , rhs); case BXOR: return IBINOP(lhs, ^ , rhs); case BOR: return IBINOP(lhs, | , rhs); case LAND: case LOR: break; } } else { // integer result switch (get(exp, Binary,operator)) { case INDEX: assert("!unimplemented"); case MUL: return IBINOP(lhs, * , rhs); case DIV: return IBINOP(lhs, / , rhs); case MOD: return IBINOP(lhs, % , rhs); case ADD: return IBINOP(lhs, + , rhs); case SUB: return IBINOP(lhs, - , rhs); case SHL: return IBINOP(lhs, <<, rhs); case SHR: return IBINOP(lhs, >>, rhs); case LT: return IRELOP(lhs, < , rhs); case LE: return IRELOP(lhs, <=, rhs); case GE: return IRELOP(lhs, >=, rhs); case GT: return IRELOP(lhs, > , rhs); case EQ: return IRELOP(lhs, == , rhs); case NE: return IRELOP(lhs, !=, rhs); case BAND: return IBINOP(lhs, & , rhs); case BXOR: return IBINOP(lhs, ^ , rhs); case BOR: return IBINOP(lhs, | , rhs); case LAND: case LOR: break; } } } } assert(!"this cannot happen"); break; } case Assign: { assert(!"unimplemented"); break; } case Cast: { assert(!"unimplemented"); break; } case While: { oop cond = get(exp, While,condition); oop expr = get(exp, While,expression); oop result = nil; switch (nlrPush()) { case NLR_INIT: break; case NLR_RETURN: nlrReturn(NLR_RETURN, nlrPop()); // propagate upwards case NLR_CONTINUE: break; case NLR_BREAK: return nlrPop(); } while (isTrue(eval(cond, env))) { result = eval(expr, env); } nlrPop(); return result; } case For: { assert(!"unimplemented"); return nil; } case If: { oop cond = get(exp, If,condition); oop conseq = get(exp, If,consequent); oop altern = get(exp, If,alternate); return isTrue(eval(cond, env)) ? eval(conseq, env) : eval(altern, env); } case Return: { nlrReturn(NLR_RETURN, eval(get(exp, Return,value), env)); break; } case Continue: { nlrReturn(NLR_CONTINUE, nil); break; } case Break: { nlrReturn(NLR_BREAK, nil); break; } case Tbase: assert(!"unimplemented"); break; case Tpointer: assert(!"unimplemented"); break; case Tarray: assert(!"unimplemented"); break; case Tstruct: assert(!"unimplemented"); break; case Tfunction: assert(!"unimplemented"); break; case VarDecls: { oop vars = get(exp, VarDecls,variables); Array_do(vars, var) { oop name = get(var, Variable,name); oop type = get(var, Variable,type); oop init = get(var, Variable,value); oop valu = nil; if (is(Tfunction, type)) continue; // function declaration if (!isNil(init)) valu = eval(init, nil); defineVariable(name, type, valu); } return nil; break; } case Scope: break; case TypeName: break; case Variable: break; case Constant: break; case Function: return exp; } assert(!"this cannot happen"); return 0; } // primitive functions oop prim_printf(int argc, oop *argv, oop env) // array { oop result = nil; if (argc < 1) fatal("printf: no format string"); oop format = argv[0]; if (!is(String, format)) fatal("printf: format is not a string"); char *fmt = get(format, String,elements); int size = get(format, String,size); int n = 0; int argn = 1; for (int i = 0; i < size;) { int c = fmt[i++]; if (c == '%' && fmt[i]) { c = fmt[i++]; if (c == '%') goto echo; oop arg = nil; switch (c) { case 'd': { if (argn >= argc) fatal("not enough argments for printf format string"); arg = argv[argn++]; if (!is(Integer, arg)) fatal("%%d conversion argument is %s", getTypeName(arg)); n += printf("%ld", _integerValue(arg)); continue; } } } echo: putchar(c); ++n; } return newInteger(n); } enum opcode_t { iHALT = 0, iPUSH, iPOP, iNOT, iCOM, iNEG, iDEREF, iINDEX, iMUL, iDIV, iMOD, iADD, iSUB, iSHL, iSHR, iLT, iLE, iGE, iGT, iEQ, iNE, iAND, iXOR, iOR, iGETGVAR, iSETGVAR, iCLOSE, iCALL, iRETURN, iJMP, iJMPF, }; oop stackError(char *reason) { printf("stack %s\n", reason); exit(1); return nil; } void disassemble(oop program) { oop *code = get(program, Array,elements); int size = get(program, Array,size); int pc = 0; while (pc < size) { printf("%04d", pc); int opcode = _integerValue(code[pc++]); printf(" %02d\t", opcode); switch (opcode) { case iHALT: printf("HALT\n"); break; case iPUSH: printf("PUSH\t"); println(code[pc++]); break; case iPOP: printf("POP\n"); break; case iNOT: printf("NOT\n"); break; case iCOM: printf("COM\n"); break; case iNEG: printf("NEG\n"); break; case iDEREF: printf("DEREF\n"); break; case iINDEX: printf("INDEX\n"); break; case iMUL: printf("MUL\n"); break; case iDIV: printf("DIV\n"); break; case iMOD: printf("MOD\n"); break; case iADD: printf("ADD\n"); break; case iSUB: printf("SUB\n"); break; case iSHL: printf("SHL\n"); break; case iSHR: printf("SHR\n"); break; case iLT: printf("LT\n"); break; case iLE: printf("LE\n"); break; case iGE: printf("GE\n"); break; case iGT: printf("GT\n"); break; case iEQ: printf("EQ\n"); break; case iNE: printf("NE\n"); break; case iAND: printf("AND\n"); break; case iXOR: printf("XOR\n"); break; case iOR: printf("OR\n"); break; case iGETGVAR: printf("GETGVAR\t"); println(code[pc++]); break; case iSETGVAR: printf("SETGVAR\t"); println(code[pc++]); break; case iCLOSE: printf("CLOSE\t"); println(code[pc++]); break; case iCALL: printf("CALL\t"); println(code[pc++]); break; case iRETURN: printf("RETURN\n"); break; case iJMP: printf("JMP\t"); println(code[pc++]); break; case iJMPF: printf("JMPF\t"); println(code[pc++]); break; } } } oop execute(oop program) { oop *code = get(program, Array,elements); int pc = 0; oop stack[32]; int sp = 32; // clear the stack oop env = nil; struct Frame { Object *env; oop *code; int pc; } frames[32]; int fp = 32; # define push(O) (sp > 0 ? stack[--sp] = (O) : stackError("overflow")) # define pop() (sp < 32 ? stack[sp++] : stackError("underflow")) # define top (stack[sp]) for (;;) { oop insn = code[pc++]; switch ((enum opcode_t)_integerValue(insn)) { case iHALT: { if (sp < 31) fatal("%d items on stack at end of execution", 32-sp); if (sp < 32) return stack[sp]; fatal("stack empty at end of execution"); return nil; } case iPUSH: { oop operand = code[pc++]; push(operand); continue; } case iPOP: { pop(); continue; } case iNOT: { top = (isFalse(top) ? true : false); continue; } case iCOM: { top = newInteger(~integerValue(top)); continue; } case iNEG: { top = is(Float, top) ? newFloat(-floatValue(top)) : newInteger(-integerValue(top)); continue; } case iDEREF: { assert(!"unimplemented"); continue; } case iINDEX: { assert(!"unimplemented"); continue; } # define BINOP(OP) { \ oop rhs = pop(), lhs = pop(); \ if (is(Float, lhs) || is(Float, rhs)) push(FBINOP(lhs, OP, rhs)); \ else push(IBINOP(lhs, OP, rhs)); \ continue; \ } case iMUL: BINOP(*); case iDIV: BINOP(/); case iMOD: { oop rhs = pop(), lhs = pop(); if (is(Float, lhs) || is(Float, rhs)) push(newFloat(fmod(floatValue(lhs), floatValue(rhs)))); else push(IBINOP(lhs, %, rhs)); continue; } case iADD: BINOP(+); case iSUB: BINOP(-); # undef BINOP # define BINOP(OP) { \ oop rhs = pop(), lhs = pop(); \ push(IBINOP(lhs, OP, rhs)); \ continue; \ } case iSHL: BINOP(<<); case iSHR: BINOP(>>); case iAND: BINOP(&); case iXOR: BINOP(^); case iOR: BINOP(|); # undef BINOP # define BINOP(OP) { \ oop rhs = pop(), lhs = pop(); \ if (is(Float, lhs) || is(Float, rhs)) \ push(floatValue(lhs) OP floatValue(rhs) ? true : false); \ else \ push(integerValue(lhs) OP integerValue(rhs) ? true : false); \ continue; \ } case iLT: BINOP(< ); case iLE: BINOP(<=); case iGE: BINOP(>=); case iGT: BINOP(> ); case iEQ: BINOP(==); case iNE: BINOP(!=); # undef BINOP case iGETGVAR: { oop operand = code[pc++]; oop keyval = assoc(env, operand); if (nil != keyval) { push(get(keyval, Pair,tail)); continue; } push(get(operand, Symbol,value)); continue; } case iSETGVAR: { oop operand = code[pc++]; oop keyval = assoc(env, operand); if (nil != keyval) { set(keyval, Pair,tail, top); continue; } set(operand, Symbol,value, top); continue; } case iCLOSE: { oop func = code[pc++]; push(newClosure(func, env)); continue; } case iCALL: { int argc = _integerValue(code[pc++]); oop func = pop(); switch (getType(func)) { case Primitive: { oop result = get(func, Primitive,function)(argc, stack + sp, nil); sp += argc; // pop all arguments push(result); continue; // next instruction } case Closure: { Object *function = get(func, Closure,function); Object *environment = get(func, Closure,environment); Object *parameters = get(function, Function,parameters); int parc = get(parameters, Array,size); oop *parv = get(parameters, Array,elements); int parn = 0; while (parn < parc && argc > 0) { environment = newPair(newPair(parv[parn++], pop()), environment); --argc; } while (parn < parc) environment = newPair(newPair(parv[parn++], nil), environment); sp += argc; if (fp < 1) fatal("too many function calls"); --fp; frames[fp].env = env; env = environment; frames[fp].code = code; code = get(function, Function,code); frames[fp].pc = pc; pc = 0; assert(code != 0); continue; } default: fatal("cannot call value of type %d", getType(func)); } continue; } case iRETURN: { assert(fp < 32); env = frames[fp].env; code = frames[fp].code; pc = frames[fp].pc; ++fp; continue; } case iJMP: { int dest = _integerValue(code[pc++]); pc = dest; continue; } case iJMPF: { int dest = _integerValue(code[pc++]); oop cond = pop(); if (nil == cond) pc = dest; continue; } } } assert(!"this cannot happen"); return 0; } #define EMITo(O) Array_append(program, (O)) #define EMITi(I) EMITo(newInteger(I)) #define EMIToo(O, P) (( EMITo(O), EMITo(P) )) #define EMITio(I, P) EMIToo(newInteger(I), P) #define EMITii(I, J) EMIToo(newInteger(I), newInteger(J)) oop compileFunction(oop exp); void compileOn(oop exp, oop program, oop cs, oop bs) { switch (getType(exp)) { case Undefined: EMITio(iPUSH, exp); return; case Input: EMITio(iPUSH, exp); return; case Integer: EMITio(iPUSH, exp); return; case Float: EMITio(iPUSH, exp); return; case Symbol: EMITio(iGETGVAR, exp); return; case Pair: EMITio(iPUSH, exp); return; case String: EMITio(iPUSH, exp); return; case Array: assert(!"unimplemented"); case Primitive: EMITio(iPUSH, exp); return; case Closure: EMITio(iPUSH, exp); return; case Call: { Object *args = get(exp, Call,arguments); int argc = get(args, Array,size); oop *argv = get(args, Array,elements); for (int n = argc; n--;) compileOn(argv[n], program, cs, bs); compileOn(get(exp, Call,function), program, cs, bs); // GETVAR print EMITii(iCALL, argc); return; } case Block: { oop statements = get(exp, Block,statements); int size = get(statements, Array,size); if (0 == size) { EMITio(iPUSH, nil); return; } oop *exps = get(statements, Array,elements); for (int i = 0; i < size - 1; ++i) { compileOn(exps[i], program, cs, bs); EMITi(iPOP); } compileOn(exps[size - 1], program, cs, bs); return; } case Unary: { compileOn(get(exp, Unary,rhs), program, cs, bs); switch (get(exp, Unary,operator)) { case NEG: EMITi(iNEG); return; case NOT: EMITi(iNOT); return; case COM: EMITi(iCOM); return; case DEREF: EMITi(iDEREF); return; case REF: assert(!"unimplemented"); case PREINC: assert(!"unimplemented"); case PREDEC: assert(!"unimplemented"); case POSTINC: assert(!"unimplemented"); case POSTDEC: assert(!"unimplemented"); } break; } case Binary: { // MUL{op, lhs, rhs} switch (get(exp, Binary,operator)) { case LAND: assert(!"unimplemented"); case LOR: assert(!"unimplemented"); default: break; } compileOn(get(exp, Binary,lhs), program, cs, bs); compileOn(get(exp, Binary,rhs), program, cs, bs); switch (get(exp, Binary,operator)) { case INDEX: assert(!"unimplemented"); case MUL: EMITi(iMUL); return; case DIV: EMITi(iDIV); return; case MOD: EMITi(iMOD); return; case ADD: EMITi(iADD); return; case SUB: EMITi(iSUB); return; case SHL: EMITi(iSHL); return; case SHR: EMITi(iSHR); return; case LT: EMITi(iLT); return; case LE: EMITi(iLE); return; case GE: EMITi(iGE); return; case GT: EMITi(iGT); return; case EQ: EMITi(iEQ); return; case NE: EMITi(iNE); return; case BAND: EMITi(iAND); return; case BXOR: EMITi(iXOR); return; case BOR: EMITi(iOR); return; case LAND: case LOR: assert(!"unimplemented"); } } case Assign: { oop symbol = get(exp, Assign,lhs); oop expr = get(exp, Assign,rhs); compileOn(expr, program, cs, bs); EMITio(iSETGVAR, symbol); return; } case Cast: { assert(!"unimplemented"); return; } # define LABEL(NAME) int NAME = get(program, Array,size) # define PATCH(J, L) Array_set(program, J+1, newInteger(L)) case While: { oop continues = newArray(); oop breaks = newArray(); oop cond = get(exp, While,condition); oop body = get(exp, While,expression); EMITio(iPUSH, nil); LABEL(L1); compileOn(cond, program, cs, bs); // break/continue apply to enclosing loop LABEL(J1); EMITio(iJMPF, nil); EMITi(iPOP); compileOn(body, program, continues, breaks); EMITii(iJMP, L1); LABEL(L2); PATCH(J1, L2); for (int i = get(continues, Array,size); i--;) PATCH(_integerValue(get(continues, Array,elements)[i]), L1); for (int i = get(breaks, Array,size); i--;) PATCH(_integerValue(get(breaks, Array,elements)[i]), L2); return; } case For: { assert(!"unimplemented"); return; } case If: { oop cond = get(exp, If,condition); oop conseq = get(exp, If,consequent); oop altern = get(exp, If,alternate); compileOn(cond, program, cs, bs); LABEL(J1); EMITio(iJMPF, nil); // L1 compileOn(conseq, program, cs, bs); LABEL(J2); EMITio(iJMP, nil); // L2 LABEL(L1); compileOn(altern, program, cs, bs); LABEL(L2); PATCH(J1, L1); PATCH(J2, L2); return; } case Return: assert(!"unimplemented"); case Continue: { if (nil == cs) fatal("continue outside loop"); EMITio(iPUSH, nil); LABEL(L1); EMITio(iJMP, nil); Array_append(cs, newInteger(L1)); return; } case Break: { if (nil == bs) fatal("continue outside loop"); EMITio(iPUSH, nil); LABEL(L1); EMITio(iJMP, nil); Array_append(bs, newInteger(L1)); return; } case Tbase: assert(!"unimplemented"); return; case Tpointer: assert(!"unimplemented"); return; case Tarray: assert(!"unimplemented"); return; case Tstruct: assert(!"unimplemented"); return; case Tfunction: assert(!"unimplemented"); return; case VarDecls: assert(!"unimplemented"); return; case Scope: assert(!"this cannot happen"); return; case TypeName: assert(!"unimplemented"); return; case Variable: assert(!"unimplemented"); return; case Constant: assert(!"unimplemented"); return; case Function: { assert(0 == get(exp, Function,code)); oop prog2 = compileFunction(get(exp, Function,body)); set(exp, Function,code, get(prog2, Array,elements)); EMITio(iCLOSE, exp); return; } } } oop compileFunction(oop exp) { oop program = newArray(); compileOn(exp, program, nil, nil); EMITi(iRETURN); if (opt_v > 2) disassemble(program); return program; } oop compile(oop exp) // 6*7 { oop program = newArray(); compileOn(exp, program, nil, nil); EMITi(iHALT); if (opt_v > 2) disassemble(program); return program; } oop typeCheck(oop exp, oop fntype) { switch (getType(exp)) { case Integer: return t_int; case Float: return t_float; case String: return t_string; case Symbol: { oop value = Scope_lookup(exp); if (!value) fatal("undefined variable '%s'", symbolName(exp)); if (nil == value) fatal("uninitialised variable '%s'", symbolName(exp)); switch (getType(value)) { case Primitive: return get(value, Primitive,type); case Function: return get(value, Function,type); case Variable: return get(value, Variable,type); default: fatal("cannot typecheck value of type %s", getTypeName(value)); } return nil; } case Primitive: { return get(exp, Primitive,type); } case Function: { oop result = get(exp, Function,type ); oop name = get(exp, Function,name ); oop parameters = get(exp, Function,parameters); oop body = get(exp, Function,body ); oop ptypes = newArray(); Array_do(parameters, var) { oop type = get(var, Variable,type); if (t_void == type && (do_index || do_size > 1)) fatal("illegal void parameter"); Array_append(ptypes, type); } if (1 == Array_size(ptypes) && Array_last(ptypes) == t_void) { Array_popLast(ptypes); Array_popLast(parameters); } assert(isNil(fntype)); fntype = newTfunction(result, ptypes); set(exp, Function,type, fntype); define(name, exp); // add function to global scope so recursive calls will work Scope_begin(); // parameters Array_do(parameters, param) define(get(param, Variable,name), param); typeCheck(body, fntype); // block Scope_end(); return nil; } case Block: { Scope_begin(); oop statements = get(exp, Block,statements); Array_do(statements, statement) typeCheck(statement, fntype); Scope_end(); return nil; } case Call: { oop function = get(exp, Call,function ); oop arguments = get(exp, Call,arguments); oop tfunc = typeCheck(function, fntype); if (!is(Tfunction, tfunc)) fatal("cannot call %s", getTypeName(tfunc)); oop params = get(tfunc, Tfunction,parameters); int argc = get(arguments, Array,size); oop *argv = get(arguments, Array,elements); int parc = get(params, Array,size); oop *parv = get(params, Array,elements); if (argc != parc) fatal("wrong number (%d) of arguments, expected %d", argc, parc); for (int i = 0; i < argc; ++i) { oop part = parv[i]; oop arg = argv[i]; oop argt = typeCheck(arg, fntype); if (argt != part) fatal("cannot pass argument of type '%s' to parameter of type '%s' ", toString(argt), toString(part)); } return get(tfunc, Tfunction,result); } case Return: { assert(nil != fntype); oop result = get(fntype, Tfunction,result); oop value = get(exp, Return,value); oop vtype = isNil(value) ? t_void : typeCheck(value, fntype); if (vtype != result) fatal("incompatible return of %s from function returning %s", toString(vtype), toString(result)); return result; } case VarDecls: { oop vars = get(exp, VarDecls,variables); Array_do(vars, var) { oop varname = get(var, Variable,name); oop vartype = get(var, Variable,type); oop varval = get(var, Variable,value); if (is(Tfunction, vartype)) { oop ptypes = get(vartype, Tfunction,parameters); if (1 == Array_size(ptypes) && t_void == Array_last(ptypes)) { Array_popLast(ptypes); // make unique vartype = newTfunction(get(vartype, Tfunction,result), ptypes); } } oop old = Scope_lookup(varname); if (old) { // declared oop oldtype = nil; switch (getType(old)) { case Variable: { oldtype = get(old, Variable,type); if (oldtype == vartype) { // identical declarations oop oldval = get(old, Variable,value); if (isNil(fntype)) // global declarations if (isNil(varval) || isNil(oldval)) // at most one initialiser continue; // redeclaration is permitted fatal("multiple definiton of variable '%s'", toString(varname)); } break; } case Function: oldtype = get(old, Function,type); break; case Primitive: oldtype = get(old, Primitive,type); break; default: fatal("cannot find type of declaration: %s", toString(old)); } if (vartype == oldtype) continue; fatal("identifier '%s' redefined as different type: %s -> %s", toString(varname), declareString(oldtype, varname), declareString(vartype, varname)); } if (!isNil(varval)) { oop initype = typeCheck(varval, fntype); if (initype != vartype) fatal("initialising %s (%s) with incompatible expression (%s)", toString(varname), toString(vartype), toString(initype)); } define(varname, var); } return nil; } default: break; } fatal("cannot typeCheck: %s", toString(exp)); return 0; } void replFile(char *name, FILE *file) { input = pushInput(name, file); while (input) { if (yyparse() && yysval) { if (opt_v > 1) println(yysval); if (!opt_x) { oop result = nil; if (opt_O) { oop program = compile(yysval); result = execute(program); } else { switch (nlrPush()) { case NLR_INIT: break; case NLR_RETURN: fatal("return outside function"); case NLR_CONTINUE: fatal("continue outside loop"); case NLR_BREAK: fatal("break outside loop"); } typeCheck(yysval, nil); result = eval(yysval, nil); nlrPop(); } if (opt_v > 0) { printf("=> %s\n", toString(result)); } } } } } void replPath(char *path) { FILE *file = fopen(path, "r"); if (!file) fatal("%s: %s", path, strerror(errno)); replFile(path, file); } int main(int argc, char **argv) { true = newSymbol("true"); t_void = newTbase("void", 1); t_char = newTbase("char", 1); t_int = newTbase("int", 4); t_float = newTbase("float", 4); t_string = newTpointer(t_char); scopes = newArray(); Scope_begin(); // the global scope definePrimitive(intern("printf"), newTfunction(t_int, newArray2(t_string, t_int)), prim_printf); int repls = 0; for (int argn = 1; argn < argc;) { char *arg = argv[argn++]; if (*arg != '-') { replPath(arg); ++repls; } else { while (*++arg) { switch (*arg) { case 'O': ++opt_O; continue; case 'v': ++opt_v; continue; case 'x': ++opt_x; continue; default: fatal("uknown option '%c'", *arg); } } } } if (!repls) replFile("stdin", stdin); oop args = newArray(); Array_append(args, newInteger(1)); Array_append(args, newStringWith("main")); oop result = eval(newCall(intern("main"), args), nil); if (!is(Integer, result)) { printf("\n=> "); println(result); fatal("main did not return an integer"); } return _integerValue(result); }