From 4b29519383c83dbfa902d91ff8e0776cba25b0ff Mon Sep 17 00:00:00 2001 From: Ian Piumarta Date: Wed, 22 Jan 2025 21:10:02 +0900 Subject: [PATCH] handle enough pointers and address-of to make dangling-pointer.c work; add %p conversion to printf(); add assert() --- main.leg | 418 +++++++++++++++++++++++++++++++++++++++---------------- test.txt | 5 + 2 files changed, 304 insertions(+), 119 deletions(-) diff --git a/main.leg b/main.leg index a137a6b..a941e56 100644 --- a/main.leg +++ b/main.leg @@ -1,6 +1,6 @@ # main.leg -- C parser + interpreter # -# Last edited: 2025-01-22 15:46:10 by piumarta on zora +# Last edited: 2025-01-22 21:08:24 by piumarta on zora %{ ; @@ -58,14 +58,15 @@ typedef union Object Object, *oop; #define _do_types(_) \ _(Undefined) _(Input) _(Integer) _(Float) _(Symbol) _(Pair) _(String) _(Array) \ - _(Closure) _(Call) _(Block) _(Unary) _(Binary) _(Assign) _(Cast) \ + _(Reference) _(Closure) _(Call) _(Block) \ + _(Address) _(Dereference) _(Unary) _(Binary) _(Assign) _(Cast) \ _(While) _(For) _(If) _(Return) _(Continue) _(Break) \ _(Tbase) _(Tpointer) _(Tarray) _(Tstruct) _(Tfunction) \ _(Scope) _(TypeName) _(Variable) _(Constant) _(Function) _(Primitive) \ _(VarDecls) #define _do_unaries(_) \ - _(NEG) _(NOT) _(COM) _(DEREF) _(REF) _(PREINC) _(PREDEC) _(POSTINC) _(POSTDEC) + _(NEG) _(NOT) _(COM) _(PREINC) _(PREDEC) _(POSTINC) _(POSTDEC) #define _do_binaries(_) \ _(INDEX) \ @@ -99,77 +100,83 @@ char *binaryName(int op) { typedef oop (* prim_t)(int nargs, oop *arguments, oop environment); -struct Undefined { type_t _type; }; -struct Input { type_t _type; char *name; FILE *file; oop next; }; -struct Integer { type_t _type; long value; }; -struct Float { type_t _type; double value; }; -struct Symbol { type_t _type; char *name; oop value; }; -struct Pair { type_t _type; oop head, tail; }; -struct String { type_t _type; int size; char *elements; }; -struct Array { type_t _type; int size; oop *elements; }; -struct Closure { type_t _type; oop function, environment; }; -struct Call { type_t _type; oop function, arguments; }; -struct Block { type_t _type; oop statements; }; -struct Unary { type_t _type; unary_t operator; oop rhs; }; -struct Binary { type_t _type; binary_t operator; oop lhs, rhs; }; -struct Assign { type_t _type; oop lhs, rhs; }; -struct Cast { type_t _type; oop type, declarator, rhs; }; -struct While { type_t _type; oop condition, expression; }; -struct For { type_t _type; oop initialiser, condition, update, body; }; -struct If { type_t _type; oop condition, consequent, alternate; }; -struct Return { type_t _type; oop value; }; -struct Continue { type_t _type; }; -struct Break { type_t _type; }; - -struct Tbase { type_t _type; char *name; int size; }; -struct Tpointer { type_t _type; oop target; }; -struct Tarray { type_t _type; oop target; oop size; }; -struct Tstruct { type_t _type; oop tag, members; }; -struct Tfunction { type_t _type; oop result, parameters; }; - -struct Scope { type_t _type; oop names, types, values; }; -struct TypeName { type_t _type; oop name, type; }; -struct Variable { type_t _type; oop name, type, value; }; -struct Constant { type_t _type; oop name, type, value; }; -struct Function { type_t _type; oop name, type, parameters, body, *code; int variadic; }; -struct Primitive { type_t _type; oop name, type; prim_t function; }; -struct VarDecls { type_t _type; oop type, declarations, variables; }; +struct Undefined { type_t _type; }; +struct Input { type_t _type; char *name; FILE *file; oop next; }; +struct Integer { type_t _type; long value; }; +struct Float { type_t _type; double value; }; +struct Symbol { type_t _type; char *name; oop value; }; +struct Pair { type_t _type; oop head, tail; }; +struct String { type_t _type; int size; char *elements; }; +struct Array { type_t _type; int size; oop *elements; }; +struct Reference { type_t _type; oop target; }; +struct Closure { type_t _type; oop function, environment; }; +struct Call { type_t _type; oop function, arguments; }; +struct Block { type_t _type; oop statements; }; +struct Address { type_t _type; oop rhs; }; +struct Dereference { type_t _type; oop rhs; }; +struct Unary { type_t _type; unary_t operator; oop rhs; }; +struct Binary { type_t _type; binary_t operator; oop lhs, rhs; }; +struct Assign { type_t _type; oop lhs, rhs; }; +struct Cast { type_t _type; oop type, declarator, rhs; }; +struct While { type_t _type; oop condition, expression; }; +struct For { type_t _type; oop initialiser, condition, update, body; }; +struct If { type_t _type; oop condition, consequent, alternate; }; +struct Return { type_t _type; oop value; }; +struct Continue { type_t _type; }; +struct Break { type_t _type; }; + +struct Tbase { type_t _type; char *name; int size; }; +struct Tpointer { type_t _type; oop target; }; +struct Tarray { type_t _type; oop target; oop size; }; +struct Tstruct { type_t _type; oop tag, members; }; +struct Tfunction { type_t _type; oop result, parameters; }; + +struct Scope { type_t _type; oop names, types, values; }; +struct TypeName { type_t _type; oop name, type; }; +struct Variable { type_t _type; oop name, type, value; }; +struct Constant { type_t _type; oop name, type, value; }; +struct Function { type_t _type; oop name, type, parameters, body, *code; int variadic; }; +struct Primitive { type_t _type; oop name, type; prim_t function; }; +struct VarDecls { type_t _type; oop type, declarations, variables; }; union Object { - type_t _type; - struct Input Input; - struct Integer Integer; - struct Float Float; - struct Symbol Symbol; - struct Pair Pair; - struct String String; - struct Array Array; - struct Primitive Primitive; - struct Closure Closure; - struct Call Call; - struct Block Block; - struct Unary Unary; - struct Binary Binary; - struct Assign Assign; - struct Cast Cast; - struct For For; - struct While While; - struct If If; - struct Return Return; - struct Continue Continue; - struct Break Break; - struct Tbase Tbase; - struct Tpointer Tpointer; - struct Tarray Tarray; - struct Tstruct Tstruct; - struct Tfunction Tfunction; - struct Scope Scope; - struct TypeName TypeName; - struct Variable Variable; - struct Constant Constant; - struct Function Function; - struct VarDecls VarDecls; + type_t _type; + struct Input Input; + struct Integer Integer; + struct Float Float; + struct Symbol Symbol; + struct Pair Pair; + struct String String; + struct Array Array; + struct Primitive Primitive; + struct Reference Reference; + struct Closure Closure; + struct Call Call; + struct Block Block; + struct Address Address; + struct Dereference Dereference; + struct Unary Unary; + struct Binary Binary; + struct Assign Assign; + struct Cast Cast; + struct For For; + struct While While; + struct If If; + struct Return Return; + struct Continue Continue; + struct Break Break; + struct Tbase Tbase; + struct Tpointer Tpointer; + struct Tarray Tarray; + struct Tstruct Tstruct; + struct Tfunction Tfunction; + struct Scope Scope; + struct TypeName TypeName; + struct Variable Variable; + struct Constant Constant; + struct Function Function; + struct VarDecls VarDecls; }; int opt_O = 0; // optimise (use VM) @@ -583,9 +590,12 @@ oop Map_get(oop map, oop key) return kvs[index].val; } -CTOR2(Closure, function, environment); -CTOR2(Call, function, arguments); -CTOR1(Block, statements); +CTOR1(Reference, target); +CTOR2(Closure, function, environment); +CTOR2(Call, function, arguments); +CTOR1(Block, statements); +CTOR1(Address, rhs); +CTOR1(Dereference, rhs); oop newUnary(unary_t operator, oop operand) { @@ -736,6 +746,14 @@ oop Scope_lookup(oop name) return 0; // NOTE: 0 means undefined (rather than nil, which means uninitialised) } +oop Scope_local(oop name) +{ + oop scope = Array_last(scopes); + int i = Scope_find(scope, name); + if (i >= 0) return get(get(scope, Scope,values), Array,elements)[i]; + return 0; // NOTE: 0 means undefined (rather than nil, which means uninitialised) +} + oop Scope_redeclare(oop name, oop value) { int n = get(scopes, Array,size); @@ -911,6 +929,9 @@ oop toStringOn(oop obj, oop str) case Undefined: String_appendAll(str, "", 5); break; + case Integer: + String_format(str, "%d", _integerValue(obj)); + break; case Symbol: String_format(str, "%s", get(obj, Symbol,name)); break; @@ -926,13 +947,31 @@ oop toStringOn(oop obj, oop str) break; } case Unary: { - String_format(str, "%s", unaryName(get(obj, Unary,operator))); + char *name = 0; + oop rhs = get(obj, Unary,rhs); + switch (get(obj, Unary,operator)) { + case NEG: name = "-"; break; + case NOT: name = "!"; break; + case COM: name = "~"; break; + case PREINC: String_format(str, "++"); toStringOn(rhs, str); return str; + case PREDEC: String_format(str, "--"); toStringOn(rhs, str); return str; + case POSTINC: toStringOn(rhs, str); String_format(str, "++"); return str; + case POSTDEC: toStringOn(rhs, str); String_format(str, "--"); return str; + } + String_format(str, "%s", name); + toStringOn(rhs, str); break; } case Binary: { String_format(str, "%s", binaryName(get(obj, Binary,operator))); break; } + case Assign: { + toStringOn(get(obj, Assign,lhs), str); + String_format(str, " = "); + toStringOn(get(obj, Assign,rhs), str); + break; + } case Tbase: String_format(str, "%s", get(obj, Tbase,name)); break; @@ -1049,6 +1088,11 @@ void printiln(oop obj, int indent) printf("PRIMITIVE<%s>\n", symbolName(get(obj, Primitive,name))); break; } + case Reference: { + printf("REFERENCE\n"); + printiln(get(obj, Reference,target), indent+1); + break; + } case Closure: { printf("CLOSURE\n"); printiln(get(obj, Closure,function), indent+1); @@ -1065,13 +1109,21 @@ void printiln(oop obj, int indent) printiln(get(obj, Block,statements), indent+1); break; } + case Address: { + printf("ADDRESS\n"); + printiln(get(obj, Address,rhs), indent+1); + break; + } + case Dereference: { + printf("DEREFERENCE\n"); + printiln(get(obj, Dereference,rhs), indent+1); + break; + } case Unary: { switch (get(obj, Unary,operator)) { case NEG: printf("NEG\n"); break; case NOT: printf("NOT\n"); break; case COM: printf("COM\n"); break; - case DEREF: printf("DEREF\n"); break; - case REF: printf("REF\n"); break; case PREINC: printf("PREINC\n"); break; case PREDEC: printf("PREDEC\n"); break; case POSTINC: printf("POSTINC\n"); break; @@ -1436,8 +1488,8 @@ prod = l:unary ( STAR r:unary { l = newBinary(MUL, l, r) } unary = MINUS r:unary { $$ = newUnary(NEG, r) } | PLING r:unary { $$ = newUnary(NOT, r) } | TILDE r:unary { $$ = newUnary(COM, r) } - | STAR r:unary { $$ = newUnary(DEREF, r) } - | AND r:unary { $$ = newUnary(REF, r) } + | STAR r:unary { $$ = newDereference(r) } + | AND r:unary { $$ = newAddress(r) } | PPLUS r:unary { $$ = newUnary(PREINC, r) } | MMINUS r:unary { $$ = newUnary(PREDEC, r) } | cast @@ -1627,8 +1679,7 @@ oop apply(oop function, oop arguments, oop env) oop body = get(function, Function,body); int parc = get(parameters, Array,size); int argc = get(arguments, Array,size); - if (parc != argc) - fatal("wrong number of arguments, expected %d got %d", parc, argc); + assert(get(function, Function,variadic) || (parc == argc)); oop *parv = get(parameters, Array,elements); oop *argv = get(arguments, Array,elements); Scope_begin(); @@ -1738,6 +1789,7 @@ oop eval(oop exp, oop env) case String: return exp; case Array: assert(!"this cannot happen"); case Primitive: return exp; + case Reference: return exp; case Closure: return exp; case Call: { oop fun = eval(get(exp, Call,function), env); @@ -1756,6 +1808,39 @@ oop eval(oop exp, oop env) Scope_end(); return result; } + case Address: { + oop rhs = get(exp, Address,rhs); + switch (getType(rhs)) { + case Symbol: { + rhs = Scope_lookup(rhs); + if (!rhs) assert(!"this cannot happen"); + return newReference(rhs); + } + default: + break; + } + fatal("cannot take address of: %s", toString(rhs)); + break; + } + case Dereference: { + oop rhs = get(exp, Dereference,rhs); + rhs = eval(rhs, nil); + switch (getType(rhs)) { + case Reference: rhs = get(rhs, Reference,target); break; + default: + printf("cannot dereference\n"); + println(rhs); + exit(1); + } + switch (getType(rhs)) { + case Variable: return get(rhs, Variable,value); + default: + printf("cannot complete dereference\n"); + println(rhs); + exit(1); + } + break; + } case Unary: { oop rhs = eval(get(exp, Unary,rhs), env); switch (get(exp, Unary,operator)) { @@ -1764,8 +1849,6 @@ oop eval(oop exp, oop env) : newInteger(-integerValue(rhs)) ); case NOT: return isFalse(rhs) ? true : false; case COM: return newInteger(~integerValue(rhs)); - case DEREF: assert(!"unimplemented"); - case REF: assert(!"unimplemented"); case PREINC: assert(!"unimplemented"); case PREDEC: assert(!"unimplemented"); case POSTINC: assert(!"unimplemented"); @@ -1836,7 +1919,26 @@ oop eval(oop exp, oop env) break; } case Assign: { - assert(!"unimplemented"); + oop lhs = get(exp, Assign,lhs); + oop rhs = eval(get(exp, Assign,rhs), nil); + switch (getType(lhs)) { + case Dereference: { + lhs = eval(get(lhs, Dereference,rhs), nil); + switch (getType(lhs)) { + case Reference: { + lhs = get(lhs, Reference,target); + switch (getType(lhs)) { + case Variable: return set(lhs, Variable,value, rhs); + default: break; + } + } + default: break; + } + break; + } + default: break; + } + fatal("cannot assign to: %s", toString(lhs)); break; } case Cast: { @@ -1897,6 +1999,7 @@ oop eval(oop exp, oop env) if (!isNil(init)) valu = eval(init, nil); declareVariable(name, type, valu); } + return nil; } case Scope: break; case TypeName: break; @@ -1914,33 +2017,36 @@ oop preval(oop exp) { if (opt_v > 2) { printf("PREVAL "); println(exp); } switch (getType(exp)) { - case Undefined: break; - case Input: break; - case Integer: return exp; - case Float: return exp; - case Symbol: break; - case Pair: break; - case String: break; - case Array: break; - case Primitive: break; - case Closure: break; - case Call: break; - case Block: break; - case Unary: break; - case Binary: break; - case Assign: break; - case Cast: break; - case While: break; - case For: break; - case If: break; - case Return: break; - case Continue: break; - case Break: break; - case Tbase: break; - case Tpointer: break; - case Tarray: break; - case Tstruct: break; - case Tfunction: break; + case Undefined: break; + case Input: break; + case Integer: return exp; + case Float: return exp; + case Symbol: break; + case Pair: break; + case String: break; + case Array: break; + case Primitive: break; + case Reference: break; + case Closure: break; + case Call: break; + case Block: break; + case Address: break; + case Dereference: break; + case Unary: break; + case Binary: break; + case Assign: break; + case Cast: break; + case While: break; + case For: break; + case If: break; + case Return: break; + case Continue: break; + case Break: break; + case Tbase: break; + case Tpointer: break; + case Tarray: break; + case Tstruct: break; + case Tfunction: break; case VarDecls: { oop vars = get(exp, VarDecls,variables); Array_do(vars, var) { @@ -1989,6 +2095,17 @@ oop prim_printf(int argc, oop *argv, oop env) // array n += printf("%ld", _integerValue(arg)); continue; } + case 'p': { + switch (getType(arg)) { + case Reference: + n += printf("%p", get(arg, Reference,target)); + continue; + default: + break; + } + fatal("%%p conversion argument is %s", getTypeName(arg)); + continue; + } case 's': { if (!is(String, arg)) fatal("%%d conversion argument is %s", getTypeName(arg)); @@ -2007,6 +2124,25 @@ oop prim_printf(int argc, oop *argv, oop env) // array return newInteger(n); } +int toBoolean(oop arg) +{ + switch (getType(arg)) { + case Integer: return _integerValue(arg); + case Float: return integerValue(arg); + case Reference: return 1; + default: fatal("cannot convert %s to boolean", getTypeName(arg)); + } + return 0; +} + +oop prim_assert(int argc, oop *argv, oop env) // array +{ + if (argc != 1) fatal("assert: wrong number of arguments"); + int value = toBoolean(argv[0]); + if (!value) fatal("assertion failed\n"); + return nil; +} + enum opcode_t { iHALT = 0, iPUSH, iPOP, iNOT, iCOM, iNEG, iDEREF, iINDEX, iMUL, iDIV, iMOD, iADD, iSUB, iSHL, iSHR, @@ -2283,6 +2419,7 @@ void compileOn(oop exp, oop program, oop cs, oop bs) case String: EMITio(iPUSH, exp); return; case Array: assert(!"unimplemented"); case Primitive: EMITio(iPUSH, exp); return; + case Reference: assert(!"unimplemented"); case Closure: EMITio(iPUSH, exp); return; case Call: { Object *args = get(exp, Call,arguments); @@ -2308,14 +2445,14 @@ void compileOn(oop exp, oop program, oop cs, oop bs) compileOn(exps[size - 1], program, cs, bs); return; } + case Address: assert(0); + case Dereference: assert(0); case Unary: { compileOn(get(exp, Unary,rhs), program, cs, bs); switch (get(exp, Unary,operator)) { case NEG: EMITi(iNEG); return; case NOT: EMITi(iNOT); return; case COM: EMITi(iCOM); return; - case DEREF: EMITi(iDEREF); return; - case REF: assert(!"unimplemented"); case PREINC: assert(!"unimplemented"); case PREDEC: assert(!"unimplemented"); case POSTINC: assert(!"unimplemented"); @@ -2487,14 +2624,22 @@ oop typeCheck(oop exp, oop fntype) } return nil; } + case Address: { + return newTpointer(typeCheck(get(exp, Address,rhs), fntype)); + } + case Dereference: { + oop rhs = typeCheck(get(exp, Dereference,rhs), fntype); + if (!is(Tpointer, rhs)) { + fatal("cannot dereference '%s'", toString(rhs)); + } + return get(rhs, Tpointer,target); + } case Unary: { oop rhs = typeCheck(get(exp, Unary,rhs), fntype); switch (get(exp, Unary,operator)) { case NEG: assert(!"unimplemented"); case NOT: assert(!"unimplemented"); case COM: assert(!"unimplemented"); - case DEREF: assert(!"unimplemented"); - case REF: return newTpointer(rhs); case PREINC: assert(!"unimplemented"); case PREDEC: assert(!"unimplemented"); case POSTINC: assert(!"unimplemented"); @@ -2534,6 +2679,13 @@ oop typeCheck(oop exp, oop fntype) } return nil; } + case Assign: { + oop lhs = typeCheck(get(exp, Assign,lhs), fntype); + oop rhs = typeCheck(get(exp, Assign,rhs), fntype); + if (lhs != rhs) + fatal("incompatible types assigning '%s' to '%s'", toString(rhs), toString(lhs)); + return lhs; + } case Primitive: { return get(exp, Primitive,type); } @@ -2586,8 +2738,8 @@ oop typeCheck(oop exp, oop fntype) int parc = get(params, Array,size); oop *parv = get(params, Array,elements); int vararg = parc && t_etc == parv[parc - 1]; - if ((vararg && (argc < parc)) || (!vararg && (argc != parc))) - fatal("wrong number (%d %d) of arguments, expected %d", vararg, argc, parc); + if ((vararg && (argc < parc) && parc > 1) || (!vararg && (argc != parc))) + fatal("wrong number (%d) of arguments, expected %d", argc, parc); for (int i = 0; i < argc; ++i) { oop part = parv[i]; if (part == t_etc) break; @@ -2623,7 +2775,7 @@ oop typeCheck(oop exp, oop fntype) vartype = newTfunction(get(vartype, Tfunction,result), ptypes); } } - oop old = Scope_lookup(varname); + oop old = Scope_local(varname); if (old) { // declared oop oldtype = nil; switch (getType(old)) { @@ -2727,6 +2879,10 @@ int main(int argc, char **argv) newTfunction(t_int, newArray2(t_string, t_etc)), prim_printf); + declarePrimitive(intern("assert"), + newTfunction(t_void, newArray1(t_etc)), + prim_assert); + int repls = 0; for (int argn = 1; argn < argc;) { @@ -2753,7 +2909,31 @@ int main(int argc, char **argv) Array_append(args, newInteger(1)); Array_append(args, newStringWith("main")); - oop result = eval(newCall(intern("main"), args), nil); + oop entry = Scope_lookup(intern("main")); + if (!entry || isNil(entry)) fatal("main is not defined"); + if (!is(Function, entry)) fatal("main is not a function"); + oop params = get(get(entry, Function,type), Tfunction, parameters); + oop t_sptr = newTpointer(newTpointer(t_char)); + switch (Array_size(params)) { + default: + fatal("main has too many parameters"); + case 3: + if (Array_get(params, 2) != t_sptr) + fatal("third parameter of main should be 'char **'"); + case 2: + if (Array_get(params, 1) != t_sptr) + fatal("second parameter of main should be 'char **'"); + case 1: + if (Array_get(params, 0) != t_int) + fatal("first parameter of main should be 'int'"); + case 0: + break; + } + + set(entry, Function,variadic, 1); + + if (opt_v > 1) printf("---------------- execute\n"); + oop result = apply(entry, args, nil); if (!is(Integer, result)) { printf("\n=> "); diff --git a/test.txt b/test.txt index 9863345..bf1c7d7 100755 --- a/test.txt +++ b/test.txt @@ -10,6 +10,11 @@ char *bar(void) { return "bye bye"; } int main(int argc, char **argv) { printf("hello, world %d %s\n", foo(), bar()); + int x = 42; + int *p = &x; + printf("x is %d p is %p\n", *p, p); + *p = 666; + printf("x is %d %d\n", x, *p); return 0; }