%{ /* compile: leg -o parse.c parse.leg * cc -o parse parse.c * * run: echo "3+4" | ./parse */ #define DO_PROTOS() \ _DO(if) _DO(while) _DO(call) _DO(func) _DO(compoundStatement) _DO(declaration) _DO(assign) \ _DO(map) _DO(symbol) _DO(integer) _DO(string) \ _DO(logor) _DO(logand) _DO(bitor) _DO(bitxor) _DO(bitand) \ _DO(equal) _DO(noteq) _DO(less) _DO(lesseq) _DO(greater) _DO(greatereq) _DO(shleft) _DO(shright) \ _DO(add) _DO(sub) _DO(mul) _DO(div) _DO(mod) _DO(not) _DO(neg) _DO(com) \ _DO(getMember) _DO(setMember) _DO(getIndex) _DO(setIndex) typedef enum { t_UNDEFINED=0, #define _DO(NAME) t_##NAME, DO_PROTOS() #undef _DO } proto_t; #define SYMBOL_PAYLOAD proto_t prototype; #include "object.c" // this is the global scope oop globals= 0; #define DO_SYMBOLS() \ DO_PROTOS() _DO(__proto__) _DO(__name__) \ _DO(name) _DO(body) _DO(param) _DO(key) _DO(value) _DO(condition) _DO(consequent) _DO(alternate) \ _DO(lhs) _DO(rhs) _DO(scope) _DO(args) _DO(statements) #define _DO(NAME) oop NAME##_symbol; DO_SYMBOLS() #undef _DO #define _DO(NAME) oop NAME##_proto; DO_PROTOS() #undef _DO int opt_v = 0; oop newObject(oop proto) { oop map = makeMap(); map_set(map, __proto___symbol, proto); return map; } void printObjectName(oop object) { assert(is(Map, object)); oop name = map_get(object, __name___symbol); if (name != null) { println(name); return; } oop proto = map_get(object, __proto___symbol); if (proto != null) { printObjectName(proto); } else { fprintf(stderr, "\nThis map has no name\n"); } } oop newMap() { oop map = newObject(map_proto); return map; } oop newDeclaration(oop name, oop exp) { oop declaration = newObject(declaration_proto); map_set(declaration, lhs_symbol, name); map_set(declaration, rhs_symbol, exp); return declaration; } oop newIf(oop cond, oop cons, oop alt) { oop obj = newObject(if_proto); map_set(obj, condition_symbol, cond); map_set(obj, consequent_symbol, cons); map_set(obj, alternate_symbol, alt); return obj; } oop newWhile(oop cond, oop body) { oop obj = newObject(while_proto); map_set(obj, condition_symbol, cond); map_set(obj, body_symbol, body); return obj; } oop newAssign(oop lhs, oop rhs) { oop assign = newObject(assign_proto); map_set(assign, lhs_symbol, lhs); map_set(assign, rhs_symbol, rhs); return assign; } // take char *name or oop already interned? oop newSymbol(oop name) { oop symbol = newObject(symbol_proto); // what is the less confusing, name or value? maybe another word like identifier? map_set(symbol, value_symbol, name); return symbol; } oop newInteger(int value) { oop integer = newObject(integer_proto); map_set(integer, value_symbol, makeInteger(value)); return integer; } int isradix(int r, int c) { if (c < '0') return 0; if (c >= 'a') c -= 'a' - 'A'; // tolower(c) if ('9' < c && c < 'A') return 0; if (c >= 'A') c -= 'A' - 10; return c < r; } char *unescape(char *s) { char *t= strdup(s); // this is garbage collected int in= 0, out= 0, c= 0; while (0 != (c= t[in++])) { if ('\\' == c && 0 != (c= t[in++])) { switch (c) { case 'a': c= '\a'; break; case 'b': c= '\b'; break; case 'e': c= '\e'; break; case 'f': c= '\f'; break; case 'n': c= '\n'; break; case 'r': c= '\r'; break; case 't': c= '\t'; break; case 'v': c= '\v'; break; case '0'...'7': { c= c - '0'; ++in; if (isradix(8, t[in])) c= c * 8 + t[in++] - '0'; if (isradix(8, t[in])) c= c * 8 + t[in++] - '0'; break; } case 'x': { c= 0; ++in; if (isradix(16, t[in])) c= c * 16 + t[in++] - '0'; if (isradix(16, t[in])) c= c * 16 + t[in++] - '0'; break; } case 'u': { c= 0; ++in; if (isradix(16, t[in])) c= c * 16 + t[in++] - '0'; if (isradix(16, t[in])) c= c * 16 + t[in++] - '0'; if (isradix(16, t[in])) c= c * 16 + t[in++] - '0'; if (isradix(16, t[in])) c= c * 16 + t[in++] - '0'; break; } } } t[out++]= c; } t[out]= 0; return t; } oop newString(char *value) { oop string = newObject(string_proto); oop primitive_string = makeString(value); map_set(string, value_symbol, primitive_string); return string; } oop newUnary(oop proto, oop rhs) { oop obj = newObject(proto); map_set(obj, rhs_symbol, rhs); return obj; } oop newBinary(oop proto, oop lhs, oop rhs) { oop obj = newObject(proto); map_set(obj, lhs_symbol, lhs); map_set(obj, rhs_symbol, rhs); return obj; } // factorize a bit or not? oop newSetMember(oop map, oop key, oop value) { oop obj = newObject(setMember_proto); map_set(obj, map_symbol, map); map_set(obj, key_symbol, key); map_set(obj, value_symbol, value); return obj; } oop newGetMember(oop map, oop key) { oop obj = newObject(getMember_proto); map_set(obj, map_symbol, map); map_set(obj, key_symbol, key); return obj; } oop newSetIndex(oop map, oop key, oop value) { oop obj = newObject(setIndex_proto); map_set(obj, map_symbol, map); map_set(obj, key_symbol, key); map_set(obj, value_symbol, value); return obj; } oop newGetIndex(oop map, oop key) { oop obj = newObject(getIndex_proto); map_set(obj, map_symbol, map); map_set(obj, key_symbol, key); return obj; } oop newFunc(oop name, oop param, oop body) { oop func = newObject(func_proto); map_set(func, name_symbol, name); map_set(func, param_symbol, param); map_set(func, body_symbol, body); return func; } oop newCall(oop func, oop args) { oop call = newObject(call_proto); map_set(call, func_symbol, func); map_set(call, args_symbol, args); return call; } oop newCompoundStatement(oop statements) { oop obj = newObject(compoundStatement_proto); map_set(obj, statements_symbol, statements); return obj; } // this always creates the key in "object" oop newVariable(oop object, oop key, oop value) { map_set(object, key, value); return value; } // this looks in object and everything in the __proto__ chain until it finds the key oop getVariable(oop object, oop key) { while (!map_hasKey(object, key)) { object = map_get(object, __proto___symbol); if (object == null) { return null; } } return map_get(object, key); } // this follows the __proto__ chain until it finds the key, if it fails it behaves like newMember oop setVariable(oop object, oop key, oop value) { while (!map_hasKey(object, key)) { object = map_get(object, __proto___symbol); if (object == null) { fprintf(stderr, "\nUndefined, %s\n", get(key, Symbol, name)); exit(1); } } return map_set(object, key, value); } #define YYSTYPE oop YYSTYPE yylval; %} start = - e:stmt { yylval = e } stmt = e:exp SEMICOLON* { $$= e } exp = VAR l:IDENT ASSIGN e:exp { $$ = newDeclaration(l, e) } | VAR l:IDENT { $$ = newDeclaration(l, null) } | FUN l:IDENT p:paramList e:stmt { $$ = newFunc(l, p, e) } | FUN p:paramList e:stmt { $$ = newFunc(null, p, e) } | IF LPAREN c:exp RPAREN t:stmt ELSE f:stmt { $$ = newIf(c, t, f ) } | IF LPAREN c:exp RPAREN t:stmt { $$ = newIf(c, t, null) } | WHILE LPAREN c:exp RPAREN e:stmt { $$ = newWhile(c, e) } | s:compoundStatement { $$ = newCompoundStatement(s) } | l:IDENT ASSIGN e:exp { $$ = newAssign(l, e) } | l:postfix DOT i:IDENT ASSIGN e:exp { $$ = newSetMember(l, i, e) } | l:postfix LBRAC i:exp RBRAC ASSIGN e:exp { $$ = newSetIndex(l, i, e) } | c:cond { $$ = c } compoundStatement = LCB m:makeMap ( e:exp { map_append(m, e) } ) * RCB { $$ = m } cond = c:logor QUERY t:exp COLON f:cond { $$ = newIf(c, t, f) } | logor logor = l:logand ( LOGOR r:logand { l = newBinary(logor_proto, l, r) } )* { $$ = l } logand = l:bitor ( LOGAND r:bitor { l = newBinary(logand_proto, l, r) } )* { $$ = l } bitor = l:bitxor ( BITOR r:bitxor { l = newBinary(bitor_proto, l, r) } )* { $$ = l } bitxor = l:bitand ( BITXOR r:bitand { l = newBinary(bitxor_proto, l, r) } )* { $$ = l } bitand = l:eq ( BITAND r:eq { l = newBinary(bitand_proto, l, r) } )* { $$ = l } eq = l:ineq ( EQUAL r:ineq { l = newBinary(equal_proto, l, r) } | NOTEQ r:ineq { l = newBinary(noteq_proto, l, r) } )* { $$ = l } ineq = l:shift ( LESS r:shift { l = newBinary(less_proto, l, r) } | LESSEQ r:shift { l = newBinary(lesseq_proto, l, r) } | GREATEREQ r:shift { l = newBinary(greatereq_proto, l, r) } | GREATER r:shift { l = newBinary(greater_proto, l, r) } )* { $$ = l } shift = l:sum ( SHLEFT r:sum { l = newBinary(shleft_proto, l, r) } | SHRIGHT r:sum { l = newBinary(shright_proto, l, r) } )* { $$ = l } sum = l:prod ( PLUS r:prod { l = newBinary(add_proto, l, r) } | MINUS r:prod { l = newBinary(sub_proto, l, r) } )* { $$ = l } prod = l:prefix ( MULTI r:prefix { l = newBinary(mul_proto, l, r) } | DIVIDE r:prefix { l = newBinary(div_proto, l, r) } | MODULO r:prefix { l = newBinary(mod_proto, l, r) } )* { $$ = l } prefix = PLUS n:prefix { $$= n } | MINUS n:prefix { $$= newUnary(neg_proto, n) } | TILDE n:prefix { $$= newUnary(com_proto, n) } | PLING n:prefix { $$= newUnary(not_proto, n) } | n:postfix { $$= n } postfix = i:value ( DOT s:IDENT a:argumentList { map_set(a, intern("this"), i); i = newCall(i, a) } | DOT s:IDENT !ASSIGN { i = newGetMember(i, s) } | LBRAC p:exp RBRAC !ASSIGN { i = newGetIndex(i, p) } | a:argumentList { i = newCall(i, a) } ) * { $$ = i } paramList = LPAREN m:makeMap ( i:IDENT { map_append(m, i) } ( COMMA i:IDENT { map_append(m, i) } ) * ) ? RPAREN { $$ = m } argumentList = LPAREN m:makeMap ( e:exp { map_append(m, e) } ( COMMA e:exp { map_append(m, e) } ) * ) ? RPAREN { $$ = m } value = n:NUMBER { $$ = n } | s:string { $$ = s } | s:symbol { $$ = s } | m:map { $$ = m } | NULL { $$ = null } | i:IDENT { $$ = i } | LPAREN i:exp RPAREN { $$ = i } string = SQUOTE < (!SQUOTE char)* > SQUOTE { $$ = newString(unescape(yytext)) } | DQUOTE < (!DQUOTE char)* > DQUOTE { $$ = newString(unescape(yytext)) } char = '\\' . | . symbol = HASH ( i:IDENT { $$ = newSymbol(i) } | i:string { $$ = newSymbol(intern(get(i, String, value))) } ) map = LCB m:newMap ( k:IDENT COLON v:exp { map_set(m, k, v) } ( COMMA k:IDENT COLON v:exp { map_set(m, k, v) } ) * ) ? RCB { $$ = m } makeMap= { $$ = makeMap() } newMap = { $$ = newMap() } - = (blank | comment)* blank = [ \t\n\r] comment = "//" ( ![\n\r] . )* | "/*" ( !"*/" . )* "*/" IDENT = < [a-zA-Z][a-zA-Z0-9_]* > - { $$ = intern(yytext) } NUMBER = '0b' < [01]+ > - { $$ = newInteger(strtol(yytext, 0, 2)) } | '0x' < [0-9a-fA-F]+ > - { $$ = newInteger(strtol(yytext, 0, 16)) } | '0' < [0-7]+ > - { $$ = newInteger(strtol(yytext, 0, 8)) } | < [0-9]+ > - { $$ = newInteger(strtol(yytext, 0, 10)) } FUN = 'fun' ![a-zA-Z0-9_] - VAR = 'var' ![a-zA-Z0-9_] - WHILE = 'while' ![a-zA-Z0-9_] - IF = 'if' ![a-zA-Z0-9_] - ELSE = 'else' ![a-zA-Z0-9_] - NULL = 'null' ![a-zA-Z0-9_] - HASH = '#' - LOGOR = '||' - LOGAND = '&&' - BITOR = '|' ![|=] - BITXOR = '^' ![=] - BITAND = '&' ![&=] - EQUAL = '==' - NOTEQ = '!=' - LESS = '<' ![<=] - LESSEQ = '<=' - GREATEREQ = '>=' - GREATER = '>' ![>=] - SHLEFT = '<<' ![=] - SHRIGHT = '>>' ![=] - PLUS = '+' ![+=] - MINUS = '-' ![-=] - TILDE = '~' - PLING = '!' ![=] - MULTI = '*' ![=] - DIVIDE = '/' ![/=] - MODULO = '%' ![=] - ASSIGN = '=' ![=] - QUERY = '?' - COLON = ':' - SEMICOLON = ';' - COMMA = ',' - DOT = '.' - LCB = '{' - RCB = '}' - LBRAC = '[' - RBRAC = ']' - LPAREN = '(' - RPAREN = ')' - DQUOTE = '"' - SQUOTE = "'" - %% ; int getInteger(oop obj) { return get(obj, Integer, value); } int isFalse(oop obj) { return obj == null || (is(Integer, obj) && (0 == get(obj, Integer, value))); } int isTrue(oop obj) { return !isFalse(obj); } oop map_fromArrays(oop keys, oop values) { assert(is(Map, keys)); assert(is(Map, values)); int i = 0; oop map = makeMap(); oop key, value; oop index; while ((index = makeInteger(i)), map_hasKey(keys, index)) { key = map_get(keys, index); value = map_get(values, index); map_set(map, key, value); i++; } return map; } oop evalArgs(oop scope, oop args); oop eval(oop scope, oop ast) { switch(ast->type) { case Undefined: case Integer: case String: case Function: return ast; case Symbol: return getVariable(scope, ast); case Map: break; } assert(is(Map, ast)); oop proto = map_get(ast, __proto___symbol); if (proto == null) { return ast; } // proto_number is the enum version of the proto symbol proto_t proto_number = get(map_get(proto, __name___symbol), Symbol, prototype); switch (proto_number) { case t_UNDEFINED: { assert(0); return 0; } case t_map: { return ast; } case t_declaration: { oop lhs = map_get(ast, lhs_symbol); oop rhs = eval(scope, map_get(ast, rhs_symbol)); return newVariable(scope, lhs, rhs); } case t_if: { oop condition = map_get(ast, condition_symbol ); oop consequent = map_get(ast, consequent_symbol); oop alternate = map_get(ast, alternate_symbol ); return eval(scope, isTrue(eval(scope, condition)) ? consequent : alternate); } case t_while: { oop condition = map_get(ast, condition_symbol ); oop body = map_get(ast, body_symbol); oop result = null; while (isTrue(eval(scope, condition))) result= eval(scope, body); return result; } case t_assign: { oop lhs = map_get(ast, lhs_symbol); oop rhs = eval(scope, map_get(ast, rhs_symbol)); return setVariable(scope, lhs, rhs); } case t_func: { oop name = map_get(ast, name_symbol); oop param = map_get(ast, param_symbol); oop body = map_get(ast, body_symbol); oop func = makeFunction(NULL, param, body, scope); if (opt_v) { printf("funcscope\n"); println(scope); } if (name != null) newVariable(scope, name, func); if (opt_v) println(scope); return func; } case t_call: { oop func = eval(scope, map_get(ast, func_symbol)); if (!is(Function, func)) { printf("cannot call "); println(func); exit(1); } oop args = evalArgs(scope, map_get(ast, args_symbol)); if (get(func, Function, primitive) == NULL) { oop param = get(func, Function, param); oop localScope = map_fromArrays(param, args); map_set(localScope, __proto___symbol, get(func, Function, parentScope)); if (opt_v) { printf("localscope\n"); println(get(func, Function, parentScope)); println(localScope); } return eval(localScope, get(func, Function, body)); } return get(func, Function, primitive)(args); } case t_compoundStatement: { oop statements = map_get(ast, statements_symbol); int i = 0; oop index; oop statement, res; oop localScope = newObject(scope); while ((index = makeInteger(i)), map_hasKey(statements, index)) { statement = map_get(statements, index); res = eval(localScope, statement); i++; } return res; } case t_getMember: { oop map = eval(scope, map_get(ast, map_symbol)); oop key = map_get(ast, key_symbol); return map_get(map, key); } case t_setMember: { oop map = eval(scope, map_get(ast, map_symbol)); oop key = map_get(ast, key_symbol); oop value = eval(scope, map_get(ast, value_symbol)); return map_set(map, key, value); } case t_getIndex: { oop map = eval(scope, map_get(ast, map_symbol)); oop key = eval(scope, map_get(ast, key_symbol)); return map_get(map, key); } case t_setIndex: { oop map = eval(scope, map_get(ast, map_symbol)); oop key = eval(scope, map_get(ast, key_symbol)); oop value = eval(scope, map_get(ast, value_symbol)); return map_set(map, key, value); } case t_symbol: case t_integer: case t_string: { return map_get(ast, value_symbol); } case t_logor: { oop lhs = map_get(ast, lhs_symbol); oop rhs = map_get(ast, rhs_symbol); if (isTrue(eval(scope, lhs))) return makeInteger(1); if (isTrue(eval(scope, rhs))) return makeInteger(1); return makeInteger(0); } case t_logand: { oop lhs = map_get(ast, lhs_symbol); oop rhs = map_get(ast, rhs_symbol); if (isFalse(eval(scope, lhs))) return makeInteger(0); if (isFalse(eval(scope, rhs))) return makeInteger(0); return makeInteger(1); } # define BINARY(NAME, OPERATOR) \ case t_##NAME: { \ oop lhs = eval(scope, map_get(ast, lhs_symbol)); \ oop rhs = eval(scope ,map_get(ast, rhs_symbol)); \ return makeInteger(getInteger(lhs) OPERATOR getInteger(rhs)); \ } BINARY(bitor, | ); BINARY(bitxor, ^ ); BINARY(bitand, & ); BINARY(equal, ==); BINARY(noteq, !=); BINARY(less, < ); BINARY(lesseq, <=); BINARY(greatereq, >=); BINARY(greater, > ); BINARY(shleft, <<); BINARY(shright, >>); BINARY(add, + ); BINARY(sub, - ); BINARY(mul, * ); BINARY(div, / ); BINARY(mod, % ); # undef BINARY # define UNARY(NAME, OPERATOR) \ case t_##NAME: { \ oop rhs = eval(scope ,map_get(ast, rhs_symbol)); \ return makeInteger(OPERATOR getInteger(rhs)); \ } UNARY(not, !); UNARY(neg, -); UNARY(com, ~); # undef UNARY } printf("EVAL "); println(ast); assert(0); return null; } oop prim_exit(oop params) { int status= 0; if (map_hasIntegerKey(params, 0)) { oop arg= get(params, Map, elements)[0].value; if (is(Integer, arg)) status= get(arg, Integer, value); } exit(status); } oop prim_print(oop params) { assert(is(Map, params)); for (int i= 0; i < get(params, Map, size); ++i) { oop key= get(params, Map, elements)[i].key; if (!is(Integer, key) || (i != get(key, Integer, value))) break; print(get(params, Map, elements)[i].value); } printf("\n"); return params; } oop evalArgs(oop scope, oop args) { int i = 0; oop params = makeMap(); oop index; while ((index = makeInteger(i)), map_hasKey(args, index)) { map_set(params, index, eval(scope, map_get(args, index))); i++; } return params; } int main(int argc, char **argv) { # if (USE_GC) GC_INIT(); # endif while (argc-- > 1) { ++argv; if (!strcmp(*argv, "-v")) ++opt_v; else { fprintf(stderr, "unknown option: %s\n", *argv); } } symbol_table = makeMap(); globals = makeMap(); map_set(globals, intern("exit") , makeFunction(prim_exit, null, null, globals)); map_set(globals, intern("print"), makeFunction(prim_print, null, null, globals)); #define _DO(NAME) NAME##_symbol=intern(#NAME); DO_SYMBOLS() #undef _DO #define _DO(NAME) set(NAME##_symbol, Symbol, prototype, t_##NAME); DO_PROTOS() #undef _DO #define _DO(NAME) NAME##_proto=makeMap(); map_set(NAME##_proto, __name___symbol, NAME##_symbol); DO_PROTOS() #undef _DO while (yyparse()) { if (opt_v) println(yylval); println(eval(globals, yylval)); } return 0; (void)yyAccept; }