From 31b1fa6cdbefc8a41ae3c9ef5bf3777530b29f19 Mon Sep 17 00:00:00 2001 From: mtardy Date: Mon, 6 Jul 2020 19:04:04 +0200 Subject: [PATCH] Add the object structure --- .gitignore | 3 + Makefile | 8 +- calc.leg | 180 +++++++++++++++++++++++++----------- draft.c => drafts/draft.c | 0 draft2.c => drafts/draft2.c | 0 drafts/draft_object.c | 180 ++++++++++++++++++++++++++++++++++++ drafts/object.c | 103 +++++++++++++++++++++ test.sh | 10 +- 8 files changed, 421 insertions(+), 63 deletions(-) rename draft.c => drafts/draft.c (100%) rename draft2.c => drafts/draft2.c (100%) create mode 100644 drafts/draft_object.c create mode 100644 drafts/object.c diff --git a/.gitignore b/.gitignore index 31fcf21..b3741ba 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ calc.c calc a.out + +# macOS artifcats +.DS_Store diff --git a/Makefile b/Makefile index a6db598..936d837 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,15 @@ CC=cc PG=leg +LDFLAGS=-L/usr/local/lib -lgc cparser: calc.c - $(CC) -o calc calc.c + $(CC) $(LDFLAGS) -o calc calc.c calc.c: calc.leg $(PG) -o calc.c calc.leg +obj: drafts/draft_object.c + $(CC) $(LDFLAGS) -o draft drafts/draft_object.c + clean: - rm calc.c calc + rm calc.c calc draft diff --git a/calc.leg b/calc.leg index 840aafb..7bb599d 100644 --- a/calc.leg +++ b/calc.leg @@ -6,49 +6,121 @@ * run: echo "2+3" | ./calc */ + #include #include +#include +#include // NEVER, EVER HAVE TO CALL FREE (EVER) AGAIN (YES, REALLY) +#define malloc(n) GC_MALLOC(n) +#define realloc(o, n) GC_REALLOC(o, n) -union u_t { - int ival; - char *sval; -}; -#define YYSTYPE union u_t +typedef enum { Undefined, Integer, Symbol } type_t; + +union object; +typedef union object *oop; + +#define YYSTYPE oop YYSTYPE yylval; -#define SYMBOL_TABLE_CHUNK 1024 +struct Undefined { + type_t type; +}; -typedef struct symbol_t { - char *ident; - bool defined; - int value; -} symbol_t; +struct Integer { + type_t type; + int value; +}; -typedef struct table_t { - symbol_t **array; - size_t size; // size_t allows your table to grow past the 1G element limit of a 32-bit number on 64-bit machines - size_t capacity; -} table_t; +struct Symbol { + type_t type; + char *name; + int defined; + oop value; // NULL +}; -#define TABLE_INITIALISER { NULL, 0, 0 } // first call to table_insert() will initialise storage +union object { + type_t type; + struct Undefined Undefined; + struct Integer Integer; + struct Symbol Symbol; +}; -table_t table = TABLE_INITIALISER; // safe but not strictly needed on Unix because BSS segment is initialised to all zeroes +union object _null = { .Undefined = { Undefined } }; +oop null = &_null; + +oop _checkType(oop ptr, type_t type) { + assert(ptr->type == type); + return ptr; +} + +#define get(PTR, TYPE, FIELD) _checkType(PTR, TYPE)->TYPE.FIELD +#define set(PTR, TYPE, FIELD, VALUE) _checkType(PTR, TYPE)->TYPE.FIELD = VALUE void *memcheck(void *ptr) { if (NULL == ptr) { - fprintf(stderr, "Error: out of memory\n"); - exit(EX_OSERR); // this is as close as we have for 'resource unavailable' + fprintf(stderr, "Error: out of memory\n"); + exit(EX_OSERR); // this is as close as we have for 'resource unavailable' } return ptr; } +oop makeInteger(int value) { + oop newInt = memcheck(malloc(sizeof(union object))); + newInt->type = Integer; + newInt->Integer.value = value; + return newInt; +} + +oop makeSymbol(char *name) { + oop newSymb = memcheck(malloc(sizeof(union object))); + newSymb->type = Symbol; + newSymb->Symbol.name = name; + newSymb->Symbol.defined = 0; + newSymb->Symbol.value = null; + return newSymb; +} + +void print(oop ast) { + switch (ast->type) { + case Undefined: + printf("null"); + return; + case Integer: + printf("%i", get(ast, Integer, value)); + return; + case Symbol: + printf("%s=", get(ast, Symbol, name)); + print(get(ast, Symbol, value)); + return; + } + assert(0); +} + +void println(oop ast) +{ + print(ast); + printf("\n"); +} + +#define SYMBOL_TABLE_CHUNK 1024 + +typedef struct table_t { + oop *array; + size_t size; + size_t capacity; +} table_t; + +#define TABLE_INITIALISER { NULL, 0, 0 } // first call to table_insert() will initialise storage + +table_t table = TABLE_INITIALISER; // safe but not strictly needed on Unix because BSS segment is initialised to all zeroes + ssize_t table_search(table_t *table, char *ident) { - ssize_t l = 0, r = table->size - 1; // no longer needed as parameters if we pass a table as the first parameter - while (l <= r) { // swapped the order of l and r because I always visualise data as laid out from left-to-right ;-) + ssize_t l = 0, r = table->size - 1; + while (l <= r) { ssize_t mid = (l + r) / 2; - int cmpres= strcmp(table->array[mid]->ident, ident); + int cmpres = strcmp(get(table->array[mid], Symbol, name), ident); if (cmpres > 0) r = mid - 1; else if (cmpres < 0) l = mid + 1; else return mid; // non-negative result => element found at this index @@ -56,45 +128,44 @@ ssize_t table_search(table_t *table, char *ident) return -1 - l; // negative result => 'not found', reflected around -1 instead of 0 to allow 'not found' at index 0 } -ssize_t table_insert(table_t *table, symbol_t *element, size_t pos) +// ssize_t result because -1 means 'error' +ssize_t table_insert(table_t *table, oop object, size_t pos) { + // Should I use in my code a function starting with _ or is it a convention to prevent its usage ? + _checkType(object, Symbol); if (pos > table->size) { // don't need to check for pos < 0 because size_t is unsigned return -1; } if (table->size >= table->capacity) { // on the first call table->array will be NULL and realloc() will behave like malloc() - table->array = memcheck(realloc(table->array, sizeof(symbol_t *) * (table->capacity + SYMBOL_TABLE_CHUNK))); + table->array = memcheck(realloc(table->array, sizeof(oop) * (table->capacity + SYMBOL_TABLE_CHUNK))); table->capacity += SYMBOL_TABLE_CHUNK; } memmove(table->array + pos + 1, table->array + pos, sizeof(*table->array) * (table->size - pos)); - table->array[pos] = element; + table->array[pos] = object; return ++(table->size); } -symbol_t *intern(char *ident, bool create) +oop intern(char *ident) { - ssize_t res = table_search(&table, ident); // < 0 => not found + ssize_t res= table_search(&table, ident); // < 0 => not found if (res >= 0) return table.array[res]; - if (!create) return NULL; - res= -1 - res; // 'un-negate' the resulr by reflecting it around X=-1 - symbol_t *new_symbol = memcheck(calloc(1, sizeof(symbol_t))); // calloc() will init all content to 0 (including .value member) - new_symbol->ident = memcheck(strdup(ident)); // check for out-of-memory - new_symbol->defined = false; // implicit in calloc(), but safer to do it explicitly anyway + res= -1 - res; // 'un-negate' the result by reflecting it around X=-1 + oop new_symbol = makeSymbol(memcheck(strdup(ident))); table_insert(&table, new_symbol, res); return new_symbol; } -symbol_t *update_value(symbol_t * s, int value) +oop update_symbol_value(oop symbol, oop integer) { - s->value = value; - s->defined = true; - return s; + _checkType(symbol, Symbol); + _checkType(integer, Integer); + symbol->Symbol.value = integer; + return symbol; } -char *ident_buf; - %} start = e:exp { yylval = e } @@ -103,42 +174,41 @@ exp = - (a:assign { $$ = a } | s:sum { $$ = s } ) -assign = l:IDENT { ident_buf = strdup(l.sval) } - EQUAL n:sum { symbol_t *nsymb = intern(ident_buf, true); $$.ival = update_value(nsymb, n.ival)->value; free(ident_buf) } +assign = l:IDENT EQUAL n:sum { $$ = update_symbol_value(l, n) } sum = PLUS* l:prod - ( PLUS+ r:prod { l.ival += r.ival } - | MINUS r:prod { l.ival -= r.ival } - )* { $$.ival = l.ival } + ( PLUS+ r:prod { get(l, Integer, value) += get(r, Integer, value) } + | MINUS r:prod { get(l, Integer, value) -= get(r, Integer, value) } + )* { $$ = l } prod = l:neg - ( MULTI r:neg { l.ival *= r.ival } - | DIVIDE r:neg { l.ival /= r.ival } - | MODULO r:neg { l.ival %= r.ival } - )* { $$.ival = l.ival } + ( MULTI r:neg { get(l, Integer, value) *= get(r, Integer, value) } + | DIVIDE r:neg { get(l, Integer, value) /= get(r, Integer, value) } + | MODULO r:neg { get(l, Integer, value) %= get(r, Integer, value) } + )* { $$ = l } -neg = MINUS n:neg { $$.ival = -n.ival } - | n:value { $$.ival = n.ival } +neg = MINUS n:neg { set(n, Integer, value, -get(n, Integer, value)); $$ = n } + | n:value { $$ = n } -value = n:NUMBER { $$.ival = n.ival } - | l:IDENT { symbol_t *fsymb = intern(l.sval, false); $$.ival = (fsymb != NULL) ? (fsymb->defined == true) ? fsymb->value : 0 : 0 } +value = n:NUMBER { $$ = n } + | l:IDENT { $$ = get(l, Symbol, value); // Will result in an assertion failed if ident is undefined } - = [ \t]* -NUMBER = < [0-9]+ > - { $$.ival = atoi(yytext) } +NUMBER = < [0-9]+ > - { $$ = makeInteger(atoi(yytext)) } PLUS = '+' - MINUS = '-' - MULTI = '*' - DIVIDE = '/' - MODULO = '%' - EQUAL = '=' - -IDENT = < [a-zA-Z]+ > - { $$.sval = yytext } +IDENT = < [a-zA-Z][a-zA-Z0-9_]* > - { $$ = intern(yytext) } %% int main(int argc, char **argv) { while (yyparse()) { - printf("%d\n", yylval.ival); + println(yylval); } return 0; diff --git a/draft.c b/drafts/draft.c similarity index 100% rename from draft.c rename to drafts/draft.c diff --git a/draft2.c b/drafts/draft2.c similarity index 100% rename from draft2.c rename to drafts/draft2.c diff --git a/drafts/draft_object.c b/drafts/draft_object.c new file mode 100644 index 0000000..063f433 --- /dev/null +++ b/drafts/draft_object.c @@ -0,0 +1,180 @@ +#include +#include +#include +#include +#include +#include +#include // NEVER, EVER HAVE TO CALL FREE (EVER) AGAIN (YES, REALLY) +#define malloc(n) GC_MALLOC(n) +#define realloc(o, n) GC_REALLOC(o, n) + +typedef enum { Undefined, Integer, Symbol } type_t; + +union object; +typedef union object *oop; + +struct Undefined { + type_t type; +}; + +struct Integer { + type_t type; + int value; +}; + +struct Symbol { + type_t type; + char *name; + int defined; + oop value; // NULL +}; + +union object { + type_t type; + struct Undefined Undefined; + struct Integer Integer; + struct Symbol Symbol; +}; + +union object _null = { .Undefined = { Undefined } }; +oop null = &_null; + +oop _checkType(oop ptr, type_t type) { + assert(ptr->type == type); + return ptr; +} + +#define get(PTR, TYPE, FIELD) _checkType(PTR, TYPE)->TYPE.FIELD +#define set(PTR, TYPE, FIELD, VALUE) _checkType(PTR, TYPE)->TYPE.FIELD = VALUE + +void *memcheck(void *ptr) +{ + if (NULL == ptr) { + fprintf(stderr, "Error: out of memory\n"); + exit(EX_OSERR); // this is as close as we have for 'resource unavailable' + } + return ptr; +} + +oop makeInteger(int value) { + oop newInt = memcheck(malloc(sizeof(union object))); + newInt->type = Integer; + newInt->Integer.value = value; + return newInt; +} + +oop makeSymbol(char *name) { + oop newSymb = memcheck(malloc(sizeof(union object))); + newSymb->type = Symbol; + newSymb->Symbol.name = name; + newSymb->Symbol.defined = 0; + newSymb->Symbol.value = null; + return newSymb; +} + +void print(oop ast) { + switch (ast->type) { + case Undefined: + printf("null"); + return; + case Integer: + printf("%i", get(ast, Integer, value)); + return; + case Symbol: + printf("%s=", get(ast, Symbol, name)); + print(get(ast, Symbol, value)); + return; + } + assert(0); +} + +void println(oop ast) +{ + print(ast); + printf("\n"); +} + +#define SYMBOL_TABLE_CHUNK 4 + +typedef struct table_t { + oop *array; + size_t size; + size_t capacity; +} table_t; + +#define TABLE_INITIALISER { NULL, 0, 0 } // first call to table_insert() will initialise storage + +table_t table = TABLE_INITIALISER; // safe but not strictly needed on Unix because BSS segment is initialised to all zeroes + +ssize_t table_search(table_t *table, char *ident) +{ + ssize_t l = 0, r = table->size - 1; + while (l <= r) { + ssize_t mid = (l + r) / 2; + int cmpres = strcmp(get(table->array[mid], Symbol, name), ident); + if (cmpres > 0) r = mid - 1; + else if (cmpres < 0) l = mid + 1; + else return mid; // non-negative result => element found at this index + } + return -1 - l; // negative result => 'not found', reflected around -1 instead of 0 to allow 'not found' at index 0 +} + +// ssize_t result because -1 means 'error' +ssize_t table_insert(table_t *table, oop object, size_t pos) +{ + // Should I use in my code a function starting with _ or is it a convention to prevent its usage ? + _checkType(object, Symbol); + if (pos > table->size) { // don't need to check for pos < 0 because size_t is unsigned + return -1; + } + + if (table->size >= table->capacity) { + // on the first call table->array will be NULL and realloc() will behave like malloc() + table->array = memcheck(realloc(table->array, sizeof(oop) * (table->capacity + SYMBOL_TABLE_CHUNK))); + table->capacity += SYMBOL_TABLE_CHUNK; + } + + memmove(table->array + pos + 1, table->array + pos, sizeof(*table->array) * (table->size - pos)); + table->array[pos] = object; + return ++(table->size); +} + +oop intern(char *ident) +{ + ssize_t res= table_search(&table, ident); // < 0 => not found + if (res >= 0) return table.array[res]; + res= -1 - res; // 'un-negate' the result by reflecting it around X=-1 + oop new_symbol = makeSymbol(memcheck(strdup(ident))); + table_insert(&table, new_symbol, res); + return new_symbol; +} + +oop update_symbol_value(oop symbol, oop integer) +{ + _checkType(symbol, Symbol); + _checkType(integer, Integer); + symbol->Symbol.value = integer; + return symbol; +} + +int main() +{ + char *line= 0; // this and + size_t linecap= 0; // this are needed for getline() + intern("chaussure"); // identifiers will have no trailing newline so let's test with no trailing newline + printf("Enter identifier names!\n"); + for (;;) { // using an infinite loop simplifies the break/continue logic in the body + ssize_t len= getline(&line, &linecap, stdin); // use getline() to auto-grow the buffer when necessary + if (len < 0) break; // stop at EOF + while ((len > 0) && ('\n' == line[len-1])) line[--len]= 0; // trim newlines from the end + if (len < 1) continue; // ignore empty lines + printf("intern : %p\n", intern(line)); + printf("after size : %zi\n", table.size); + printf("after capacity : %zi\n", table.capacity); + printf("\n"); + for (int i = 0; i < table.size; i++) { + printf("%i %s\n", i, get(table.array[i], Symbol, name)); + } + printf("\n"); + } +} diff --git a/drafts/object.c b/drafts/object.c new file mode 100644 index 0000000..acdaeb5 --- /dev/null +++ b/drafts/object.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include // NEVER, EVER HAVE TO CALL FREE (EVER) AGAIN (YES, REALLY) +#define malloc(n) GC_MALLOC(n) + +typedef enum { Undefined, Integer, Symbol } type_t; + +union object; +typedef union object *oop; + +struct Undefined { + type_t type; +}; + +struct Integer { + type_t type; + int value; +}; + +struct Symbol { + type_t type; + char *name; + int defined; + oop value; // NULL +}; + +union object { + type_t type; + struct Undefined Undefined; + struct Integer Integer; + struct Symbol Symbol; +}; + +union object _null = { .Undefined = { Undefined } }; +oop null = &_null; + +oop _checkType(oop ptr, type_t type) { + assert(ptr->type == type); + return ptr; +} + +#define get(PTR, TYPE, FIELD) _checkType(PTR, TYPE)->TYPE.FIELD +#define set(PTR, TYPE, FIELD, VALUE) _checkType(PTR, TYPE)->TYPE.FIELD = VALUE + +void *memcheck(void *ptr) +{ + if (NULL == ptr) { + fprintf(stderr, "Error: out of memory\n"); + exit(EX_OSERR); + } + return ptr; +} + +oop makeInteger(int value) { + oop newInt = memcheck(malloc(sizeof(union object))); + newInt->type = Integer; + newInt->Integer.value = value; + return newInt; +} + +oop makeSymbol(char *name) { + oop newSymb = memcheck(calloc(1, sizeof(union object))); + newSymb->type = Symbol; + newSymb->Symbol.name = name; + newSymb->Symbol.defined = 0; + newSymb->Symbol.value = null; + return newSymb; +} + +void print(oop ast) { + switch (ast->type) { + case Undefined: + printf("null"); + return; + case Integer: + printf("%i", get(ast, Integer, value)); + return; + case Symbol: + printf("%s=", get(ast, Symbol, name)); + print(get(ast, Symbol, value)); + return; + } + assert(0); +} + +void println(oop ast) +{ + print(ast); + printf("\n"); +} + +int main(int argc, char **argv) +{ + oop i = makeInteger(42); + oop s = makeSymbol("symbolic"); + + println(i); + println(s); + + return 0; +} diff --git a/test.sh b/test.sh index 48ba5a7..7196d13 100755 --- a/test.sh +++ b/test.sh @@ -15,9 +15,7 @@ echo "5%2" | ./${PROG} | cmp <(echo "1") && echo "test#9 passed!" || true echo "3*2+10*2" | ./${PROG} | cmp <(echo "26") && echo "test#10 passed!" || true echo "-3*2+10*2" | ./${PROG} | cmp <(echo "14") && echo "test#11 passed!" || true echo "- 5% 2 +2-6 / 2" | ./${PROG} | cmp <(echo "-2") && echo "test#12 passed!" || true -echo "a=3" | ./${PROG} | cmp <(echo "3") && echo "test#13 passed!" || true -echo "a=3 a*2" | ./${PROG} | cmp <(printf "3\n6\n") && echo "test#14 passed!" || true -echo "a=10 z=13 a+z" | ./${PROG} | cmp <(printf "10\n13\n23\n") && echo "test#15 passed!" || true -echo "tamales=10 tomato=2 tamales*tomato" | ./${PROG} | cmp <(printf "10\n2\n20\n") && echo "test#16 passed!" || true - -make clean \ No newline at end of file +echo "a=3" | ./${PROG} | cmp <(echo "a=3") && echo "test#13 passed!" || true +echo "a=3 a*2" | ./${PROG} | cmp <(printf "a=3\n6\n") && echo "test#14 passed!" || true +echo "a=10 z=13 a+z" | ./${PROG} | cmp <(printf "a=10\nz=13\n23\n") && echo "test#15 passed!" || true +echo "tamales=10 tomato=2 tamales*tomato" | ./${PROG} | cmp <(printf "tamales=10\ntomato=2\n20\n") && echo "test#16 passed!" || true \ No newline at end of file