From c4ded90303c4316948795cf10d8f698f0a244c27 Mon Sep 17 00:00:00 2001 From: Ian Piumarta Date: Thu, 30 Jan 2025 10:25:36 +0900 Subject: [PATCH] better printf(), handle initialisation of pointers and arrays from string literals --- Makefile | 2 +- demofiles/use-after-free.c | 2 + main.leg | 299 ++++++++++++++++++++++--------------- test.txt | 14 ++ 4 files changed, 193 insertions(+), 124 deletions(-) diff --git a/Makefile b/Makefile index b5c569c..a6f6ee2 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ test : main ./main -vv test.txt demo : main - for i in demofiles/*.c; do echo $$i; ./main $$i; done + for i in demofiles/*.c; do echo "--------- $$i"; ./main $$i; done demov : main for i in demofiles/*.c; do echo $$i; ./main -vv $$i; done diff --git a/demofiles/use-after-free.c b/demofiles/use-after-free.c index 2058ffb..3dbb372 100644 --- a/demofiles/use-after-free.c +++ b/demofiles/use-after-free.c @@ -8,8 +8,10 @@ int main() { int *ptr = malloc(sizeof(*ptr)); assert(ptr != 0); *ptr = 42; + printf("%d\n", *ptr); free(ptr); printf("%d\n", *ptr); // use after free *ptr = 43; // use after free + printf("%d\n", *ptr); // use after free return 0; } diff --git a/main.leg b/main.leg index aa6a0f4..80752e8 100644 --- a/main.leg +++ b/main.leg @@ -1,6 +1,6 @@ # main.leg -- C parser + interpreter # -# Last edited: 2025-01-29 18:13:39 by piumarta on zora +# Last edited: 2025-01-30 08:36:36 by piumarta on m1mbp %{ ; @@ -438,6 +438,13 @@ int String_append(oop string, int element) return elements[size] = element; } +char *String_cString(oop string) +{ + String_append(string, 0); + get(string, String,size) -= 1; + return get(string, String,elements); +} + char *String_appendAll(oop string, char *chars, int len) { char *elements = get(string, String,elements); @@ -450,6 +457,12 @@ char *String_appendAll(oop string, char *chars, int len) return chars; } +oop String_appendString(oop string, oop s) +{ + String_appendAll(string, get(s, String,elements), get(string, String,size)); + return s; +} + char *String_format(oop string, char *format, ...) { static char *buf = 0; @@ -1063,12 +1076,7 @@ oop toStringOn(oop obj, oop str) break; case String: { String_append(str, '"'); - char *chars = get(obj, String,elements); - for (int i = 0, n = get(obj, String,size); i < n; ++i) { - int c = chars[i]; - if ((' ' <= c) && (c <= 126)) String_append(str, c); - else String_format(str, "\\x%02x", c); - } + String_appendString(str, obj); String_append(str, '"'); break; } @@ -2157,6 +2165,38 @@ int toBoolean(oop arg) return 0; } +oop pointerType(oop arg) +{ + switch (getType(arg)) { + case Pointer: return get(arg, Pointer,type); + case Array: return get(arg, Array,type); + default: break; + } + return nil; +} + +oop elementType(oop arg) +{ + switch (getType(arg)) { + case Pointer: return get(get(arg, Pointer,type), Tpointer,target); + case Array: return get(get(arg, Array,type), Tarray,target); + default: break; + } + return nil; +} + +oop pointerMemory(oop arg) +{ + oop base = nil; + switch (getType(arg)) { + case Pointer: base = get(arg, Pointer,base); break; + case Array: base = get(arg, Array,base); break; + default: break; + } + if (!is(Memory, base)) return nil; + return base; +} + oop prim_printf(int argc, oop *argv, oop env) // array { oop result = nil; @@ -2168,104 +2208,97 @@ oop prim_printf(int argc, oop *argv, oop env) // array int n = 0; int argn = 1; for (int i = 0; i < size;) { - int c = fmt[i++]; - if (c == '%' && fmt[i]) { - c = fmt[i++]; - if (c == '%') goto echo; - if (argn >= argc) fatal("too few arguments for printf format string"); - oop arg = argv[argn++]; - switch (c) { - case 'd': { - if (!is(Integer, arg)) - fatal("%%d conversion argument is %s", getTypeName(arg)); - n += printf("%ld", _integerValue(arg)); - continue; - } - case 'p': { - switch (getType(arg)) { - case Pointer: { - oop base = get(arg, Pointer,base); - switch (getType(base)) { - case Integer: - n += printf("<%p", (void *)(intptr_t)_integerValue(base)); - break; - case Variable: - n += printf("<&%s", symbolName(get(base, Variable,name))); - break; - case Memory: - n += printf("<%p[%zd]", get(base, Memory,base), get(base, Memory,size)); - break; - default: - fatal("%%p conversion base is %s", getTypeName(base)); - break; - } - printf("%+d>", get(arg, Pointer,offset)); - continue; - } - case Array: { - oop base = get(arg, Array,base); - switch (getType(base)) { - case Integer: - n += printf("[%p", (void *)(intptr_t)_integerValue(base)); - break; - case Variable: - n += printf("[&%s", symbolName(get(base, Variable,name))); - break; - case Memory: - n += printf("[%p[%zd]", get(base, Memory,base), get(base, Memory,size)); - break; - default: - fatal("%%p conversion base is %s", getTypeName(base)); - break; + if (fmt[i] != '%') { + echo: + putchar(fmt[i++]); + ++n; + continue; + } + int j = i, c = 0; + int mod_z = 0, mod_l = 0; + for (;;) { + c = fmt[++j]; + if (!c) goto echo; + if (!strchr(" 0123456789#-+'.zl", c)) break; + if ('z' == c) ++mod_z; + if ('l' == c) ++mod_l; + } + if (!strchr("cdiouxXceEfFgGsp%", c)) + fatal("printf: illegal conversion specifier '%c'", c); + char buf[32]; + if (j - i >= sizeof(buf) - 1) fatal("printf: format too complex"); + int k = 0; + while (i <= j) buf[k++] = fmt[i++]; + assert(k < sizeof(buf)); + buf[k] = 0; + if ('%' == c) { + n += printf(buf, 0); // junk argument defeats gcc's -Wformat-security warning + continue; + } + if (argn >= argc) fatal("printf: too few arguments for format string"); + oop arg = argv[argn++]; + switch (c) { + case 'c': case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': { + if (!is(Integer, arg)) fatal("printf: argument of '%%%c' is not 'int'", c); + long x = _integerValue(arg); + if (mod_z ) n += printf(buf, (size_t)x); + else if (mod_l == 1) n += printf(buf, (long)x); + else if (mod_l == 2) n += printf(buf, (long long)x); + else n += printf(buf, (int)x); + continue; + } + case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': { + if (!is(Float, arg)) fatal("printf: argument of '%%%c' is not 'float'", c); + double x = _floatValue(arg); + n += printf(buf, x); + continue; + } + case 's': { + switch (getType(arg)) { + case String: { + n += printf(buf, String_cString(arg)); + continue; + } + case Pointer: + case Array: { + oop type = elementType(arg); + if (t_char == type) { + oop mem = pointerMemory(arg); + if (nil != mem) { + char *addr = get(mem, Memory,base); + int size = get(mem, Memory,size); + char *term = memchr(addr, '\0', size); + if (!term) + fatal("printf: %%s with unterminated string: %s", toString(arg)); + n += printf(buf, addr); + continue; } - printf("[%d]]", get(arg, Array,size)); - continue; } - default: - break; + break; } - fatal("%%p conversion argument is %s", getTypeName(arg)); - continue; + default: + break; } - case 's': { - switch (getType(arg)) { - case String: { - n += printf("%.*s", get(arg, String,size), get(arg, String,elements)); - continue; - } - case Pointer: { - oop type = get(arg, Pointer,type); - if (t_pchar != type) - fatal("%%s conversion of non-string pointer: %s %s", - toString(type), toString(arg)); - oop base = get(arg, Pointer,base); - switch (getType(base)) { - case Integer: { - if (!_integerValue(base)) - fatal("%%s conversion of null pointer"); - fatal("%%s conversion of arbitrary pointer: %s", toString(arg)); - } - case Variable: fatal("%%s conversion of variable: %s", toString(arg)); - case Memory: fatal("%%s conversion of memory: %s", toString(arg)); - default: assert(!"this cannot happen"); - } - break; - } - default: - break; + fatal("printf: %%s conversion of non-string: %s", toString(arg)); + break; + } + case 'p': { + char tmp[32]; + switch (getType(arg)) { + case Pointer: + case Array: { + buf[k-1] = 's'; + n += printf(buf, toString(arg)); + break; } - fatal("%%s conversion argument is: %s", toString(arg)); - continue; + default: + fatal("printf: %%p conversion of non-pointer: %s", getTypeName(arg)); } - default: - fatal("illegal printf conversion: %%%c", c); + continue; } } - echo: - putchar(c); - ++n; } - if (argn != argc) fatal("too many arguments for printf format string"); + if (argn < argc) fatal("printf: too many arguments for format string"); return newInteger(n); } @@ -2669,19 +2702,33 @@ oop typeCheck(oop exp, oop fntype) List_append(vars, var); if (!isNil(init)) { if (is(Tarray, vartype)) { - if (!is(List, init)) fatal("aggregate initialiser expected for array"); - int isize = List_size(init); - oop osize = get(vartype, Tarray,size); - int asize = isize; - if (!isNil(osize)) asize = _integerValue(osize); - if (isize < asize) fatal("too few initialisers for array"); - if (isize > asize) fatal("too many initialisers for array"); oop etype = get(vartype, Tarray,target); - List_do(init, ini) { - oop itype = typeCheck(ini, fntype); - if (itype != etype) - fatal("cannot initialise array element type '%s' with '%s'", - toString(etype), toString(itype)); + oop asize = get(vartype, Tarray,size); + int isize = 0; + if (t_char == etype && is(String, init)) { + isize = get(init, String,size); + if (isNil(asize)) ++isize; // nul terminator + } + else if (is(List, init)) { + isize = List_size(init); + } + if (isNil(asize)) { + asize = newInteger(isize); + vartype = newTarray(etype, asize); + set(var, Variable,type, vartype); // implicitly sized array + } + else { + int na = _integerValue(asize); + if (isize < na) /*fatal("too few initialisers for array")*/; + if (isize > na) fatal("too many initialisers for array"); + } + if (is(List, init)) { + List_do(init, ini) { + oop itype = typeCheck(ini, fntype); + if (itype != etype) + fatal("cannot initialise array element type '%s' with '%s'", + toString(etype), toString(itype)); + } } } else { @@ -2840,8 +2887,17 @@ oop assign(oop lhs, oop rhs) rhs = newPointer(ltype, get(rhs, Array,base), 0); break; } + case String: { + if (t_pchar == ltype) { + char *chars = String_cString(rhs); + oop memory = newMemory(chars, strlen(chars) + 1); + rhs = newPointer(ltype, memory, 0); + break; + } + } // FALL THROUGH default: { - fatal("cannot assign '%s' = '%s'", toString(lhs), toString(rhs)); + fatal("cannot assign: %s = %s'", + toString(lhs), toString(rhs)); } } } @@ -3325,8 +3381,17 @@ oop eval(oop exp, oop env) } valu = newArray(type, newMemory(mem, typeSize(target) * size), size); if (!isNil(init)) { // size and types checked during typeCheck - List_do(init, ini) { - setArray(valu, do_index, eval(ini, nil)); + if (is(String, init)) { + int isize = get(init, String,size); + assert(isize <= size); + char *chars = get(init, String,elements); + for (int i = 0; i < isize; ++i) + setArray(valu, i, newInteger(chars[i])); + } + else { + List_do(init, ini) { + setArray(valu, do_index, eval(ini, nil)); + } } } declareVariable(name, type, valu); @@ -3977,18 +4042,6 @@ int main(int argc, char **argv) Scope_begin(); // the global scope -#if 0 - declarePrimitive(intern("printf"), - newTfunction(t_int, newList2(t_pchar, t_etc)), - prim_printf); -#endif - -#if 0 - declarePrimitive(intern("assert"), - newTfunction(t_void, newList1(t_etc)), - prim_assert); -#endif - int repls = 0; for (int argn = 1; argn < argc;) { diff --git a/test.txt b/test.txt index 7cd1c13..106f2d3 100755 --- a/test.txt +++ b/test.txt @@ -59,6 +59,20 @@ int main(int argc, char **argv) assert(&array == array); assert(&array[0] == array); + char *sliteral = "string literal"; + char sarray[] = "string array"; + char tarray[13] = "string array"; + char uarray[20] = "string array"; + char varray[12] = "string array"; + + printf("%zd %s\n", sizeof(sliteral), sliteral); + printf("%zd %s\n", sizeof(sarray), sarray); + printf("%zd %s\n", sizeof(tarray), tarray); + printf("%zd %s\n", sizeof(uarray), uarray); + // printf("%zd %s\n", sizeof(varray), varray); // unterminated string + + printf("passed\n"); + return 0; }