diff --git a/.gitignore b/.gitignore index b3741ba..78b6983 100644 --- a/.gitignore +++ b/.gitignore @@ -3,10 +3,9 @@ # ignore generated parser calc.c - -# ignore compiled parser calc - +calc.dSYM/ +NOTES.txt a.out # macOS artifcats diff --git a/Makefile b/Makefile index 936d837..31d5035 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,18 @@ -CC=cc -PG=leg -LDFLAGS=-L/usr/local/lib -lgc +LEG = leg +CC = cc +CFLAGS = -Wall -g +LDLIBS = -L/usr/local/lib -lgc -cparser: calc.c - $(CC) $(LDFLAGS) -o calc calc.c +# moved LDLIBS to end because ld scans files from left to right and collects only required symbols + +calc: calc.c object.c + $(CC) $(CFLAGS) -o calc calc.c $(LDLIBS) calc.c: calc.leg - $(PG) -o calc.c calc.leg + $(LEG) -o calc.c calc.leg obj: drafts/draft_object.c - $(CC) $(LDFLAGS) -o draft drafts/draft_object.c + $(CC) -o draft drafts/draft_object.c $(LDLIBS) clean: rm calc.c calc draft diff --git a/calc.leg b/calc.leg index 2d0817a..2ef50c5 100644 --- a/calc.leg +++ b/calc.leg @@ -1,204 +1,96 @@ %{ -/* compile: leg -o calc.c calc.leg - * cc -o calc calc.c +/* compile: leg -o calc.c calc.leg + * cc -o calc calc.c * - * run: echo "2+3" | ./calc + * run: ( echo a=2*3; echo b=3+4; echo c=a*b ) | ./calc */ +#define SYMBOL_PAYLOAD \ + oop value; \ + int defined -#include -#include -#include -#include // NEVER, EVER HAVE TO CALL FREE (EVER) AGAIN (YES, REALLY) -#define malloc(n) GC_MALLOC(n) -#define realloc(o, n) GC_REALLOC(o, n) +#define SYMBOL_INITIALISE(S) \ + S.value = null; \ + S.defined = false -typedef enum { Undefined, Integer, Symbol } type_t; +#include "object.c" -union object; -typedef union object *oop; - -#define YYSTYPE oop -YYSTYPE yylval; - -struct Undefined { - type_t type; -}; - -struct Integer { - type_t type; - int value; -}; - -struct Symbol { - type_t type; - char *name; - int defined; - oop value; // NULL -}; - -union object { - type_t type; - struct Undefined Undefined; - struct Integer Integer; - struct Symbol Symbol; -}; - -union object _null = { .Undefined = { Undefined } }; -oop null = &_null; - -oop _checkType(oop ptr, type_t type) { - assert(ptr->type == type); - return ptr; -} - -#define get(PTR, TYPE, FIELD) _checkType(PTR, TYPE)->TYPE.FIELD -#define set(PTR, TYPE, FIELD, VALUE) _checkType(PTR, TYPE)->TYPE.FIELD = VALUE - -void *memcheck(void *ptr) -{ - if (NULL == ptr) { - fprintf(stderr, "Error: out of memory\n"); - exit(EX_OSERR); // this is as close as we have for 'resource unavailable' - } - return ptr; -} - -oop makeInteger(int value) { - oop newInt = memcheck(malloc(sizeof(union object))); - newInt->type = Integer; - newInt->Integer.value = value; - return newInt; -} - -oop makeSymbol(char *name) { - oop newSymb = memcheck(malloc(sizeof(union object))); - newSymb->type = Symbol; - newSymb->Symbol.name = name; - newSymb->Symbol.defined = 0; - newSymb->Symbol.value = null; - return newSymb; -} - -void print(oop ast) { - switch (ast->type) { - case Undefined: - printf("null"); - return; - case Integer: - printf("%i", get(ast, Integer, value)); - return; - case Symbol: - printf("%s=", get(ast, Symbol, name)); - print(get(ast, Symbol, value)); - return; - } - assert(0); -} - -void println(oop ast) -{ - print(ast); - printf("\n"); -} - -#define SYMBOL_TABLE_CHUNK 1024 - -typedef struct table_t { - oop *array; - size_t size; - size_t capacity; -} table_t; - -#define TABLE_INITIALISER { NULL, 0, 0 } // first call to table_insert() will initialise storage - -table_t table = TABLE_INITIALISER; // safe but not strictly needed on Unix because BSS segment is initialised to all zeroes - -ssize_t table_search(table_t *table, char *ident) -{ - ssize_t l = 0, r = table->size - 1; - while (l <= r) { - ssize_t mid = (l + r) / 2; - int cmpres = strcmp(get(table->array[mid], Symbol, name), ident); - if (cmpres > 0) r = mid - 1; - else if (cmpres < 0) l = mid + 1; - else return mid; // non-negative result => element found at this index - } - return -1 - l; // negative result => 'not found', reflected around -1 instead of 0 to allow 'not found' at index 0 -} - -// ssize_t result because -1 means 'error' -ssize_t table_insert(table_t *table, oop object, size_t pos) -{ - // Should I use in my code a function starting with _ or is it a convention to prevent its usage ? - _checkType(object, Symbol); - if (pos > table->size) { // don't need to check for pos < 0 because size_t is unsigned - return -1; - } - - if (table->size >= table->capacity) { - // on the first call table->array will be NULL and realloc() will behave like malloc() - table->array = memcheck(realloc(table->array, sizeof(oop) * (table->capacity + SYMBOL_TABLE_CHUNK))); - table->capacity += SYMBOL_TABLE_CHUNK; - } - - memmove(table->array + pos + 1, table->array + pos, sizeof(*table->array) * (table->size - pos)); - table->array[pos] = object; - return ++(table->size); -} - -oop intern(char *ident) -{ - ssize_t res= table_search(&table, ident); // < 0 => not found - if (res >= 0) return table.array[res]; - res= -1 - res; // 'un-negate' the result by reflecting it around X=-1 - oop new_symbol = makeSymbol(memcheck(strdup(ident))); - table_insert(&table, new_symbol, res); - return new_symbol; -} +// this should stay out of object.c because it manipulates Symbol members defined in this file oop update_symbol_value(oop symbol, oop integer) { - _checkType(symbol, Symbol); + //_checkType(symbol, Symbol); // For now it will fail with assigning to null // because for now we can write "a=2 b=a" because everything is passed by value - _checkType(integer, Integer); - symbol->Symbol.value = integer; + //_checkType(integer, Integer); + set(symbol, Symbol, value, integer); // checktyype is implicit, and it's ok for symbols to store any object value? return symbol; } +#define YYSTYPE oop + +YYSTYPE yylval; + %} start = e:exp { yylval = e } exp = - (a:assign { $$ = a } - | s:sum { $$ = s } + | d:delete { $$ = d } + | p:prim { $$ = p } ) -assign = l:IDENT EQUAL n:sum { $$ = update_symbol_value(l, n) } +assign = l:IDENT EQUAL p:prim { $$ = update_symbol_value(l, p) } + +# it is really unhappy that I have to repeate the code in value because +# I need both information: map identifier and the key +delete = DEL + ( i:IDENT DOT p:STRING + | i:IDENT LBRAC p:prim RBRAC + ) { $$ = map_del(get(i, Symbol, value), p) } + +prim = ( s:sum { $$ = s } + | s:string { $$ = s } + | m:map { $$ = m } + ) + +# Map +map = LCB RCB { $$ = makeMap() } + | LCB p:prop RCB { $$ = p; } + +prop = k:STRING COLON v:prim COMMA p:prop { $$ = map_set(p, k, v) } + | k:STRING COLON v:prim { $$ = map_set(makeMap(), k, v) } + +# String +string = (SQUOTE | DQUOTE) s:STRING (SQUOTE | DQUOTE) { $$ = s } -sum = PLUS* l:prod +# Number +sum = l:prod ( PLUS+ r:prod { get(l, Integer, value) += get(r, Integer, value) } | MINUS r:prod { get(l, Integer, value) -= get(r, Integer, value) } )* { $$ = l } -prod = l:neg - ( MULTI r:neg { get(l, Integer, value) *= get(r, Integer, value) } - | DIVIDE r:neg { get(l, Integer, value) /= get(r, Integer, value) } - | MODULO r:neg { get(l, Integer, value) %= get(r, Integer, value) } +prod = l:sign + ( MULTI r:sign { get(l, Integer, value) *= get(r, Integer, value) } + | DIVIDE r:sign { get(l, Integer, value) /= get(r, Integer, value) } + | MODULO r:sign { get(l, Integer, value) %= get(r, Integer, value) } )* { $$ = l } -neg = MINUS n:neg { set(n, Integer, value, -get(n, Integer, value)); $$ = n } - | n:value { $$ = n } +sign = MINUS n:sign { set(n, Integer, value, -get(n, Integer, value)); $$ = n } + | PLUS n:sign { $$ = n } + | n:value { $$ = n } value = n:NUMBER { $$ = n } - | NULL { $$ = null; // For now it doesn't work because of _checktype in update_symbol_value() } - | l:IDENT { $$ = get(l, Symbol, value); // Will result in an assertion failed if ident is undefined } + | NULL { $$ = null } + | i:IDENT DOT s:STRING { $$ = map_get(get(i, Symbol, value), s) } + | i:IDENT LBRAC p:prim RBRAC { $$ = map_get(get(i, Symbol, value), p) } + | i:IDENT { $$ = get(i, Symbol, value) } -- = [ \t]* -NUMBER = < [0-9]+ > - { $$ = makeInteger(atoi(yytext)) } +- = [ \t\n\r]* IDENT = < [a-zA-Z][a-zA-Z0-9_]* > - { $$ = intern(yytext) } +STRING = < [a-zA-Z][a-zA-Z0-9_]* > - { $$ = makeString(yytext) } +NUMBER = < [0-9]+ > - { $$ = makeInteger(atoi(yytext)) } PLUS = '+' - MINUS = '-' - MULTI = '*' - @@ -206,6 +98,16 @@ DIVIDE = '/' - MODULO = '%' - EQUAL = '=' - NULL = 'null' - +DEL = 'del' - +COLON = ':' - +COMMA = ',' - +DOT = '.' - +LCB = '{' - +RCB = '}' - +LBRAC = '[' - +RBRAC = ']' - +DQUOTE = '"' - +SQUOTE = "'" - %% diff --git a/object.c b/object.c new file mode 100644 index 0000000..f40a73d --- /dev/null +++ b/object.c @@ -0,0 +1,287 @@ +#include +#include +#include +#include // NEVER, EVER HAVE TO CALL FREE (EVER) AGAIN (YES, REALLY) + +#define malloc(n) GC_MALLOC(n) +#define realloc(o, n) GC_REALLOC(o, n) + +typedef enum { Undefined, Integer, String, Symbol, Map } type_t; + +union object; +typedef union object *oop; + +struct Undefined { + type_t type; +}; + +struct Integer { + type_t type; + int value; +}; + +struct String { + type_t type; + char *value; + size_t size; +}; + +struct Symbol { + type_t type; + char *name; +# if defined(SYMBOL_PAYLOAD) + SYMBOL_PAYLOAD; +# endif // defined(SYMBOL_PAYLOAD) +}; + +struct Pair { + oop key; + oop value; +}; + +struct Map { + type_t type; + struct Pair *elements; // even are keys, odd are values [ key val key val key val ] + size_t size; + size_t capacity; +}; + +union object { + type_t type; + struct Undefined Undefined; + struct Integer Integer; + struct String String; + struct Symbol Symbol; + struct Map Map; +}; + +union object _null = { .Undefined = { Undefined } }; +oop null = &_null; + +type_t getType(oop ptr) { + assert(ptr); + return ptr->type; +} + +int is(type_t type, oop obj) { + return type == getType(obj); +} + +oop _checkType(oop ptr, type_t type) { + assert(ptr); + assert(ptr->type == type); + return ptr; +} + +// added parens around expansion to protect assignment + +#define get(PTR, TYPE, FIELD) (_checkType(PTR, TYPE)->TYPE.FIELD) +#define set(PTR, TYPE, FIELD, VALUE) (_checkType(PTR, TYPE)->TYPE.FIELD = VALUE) + +void *memcheck(void *ptr) +{ + if (NULL == ptr) { + fprintf(stderr, "Error: out of memory\n"); + exit(EX_OSERR); // this is as close as we have for 'resource unavailable' + } + return ptr; +} + +oop makeInteger(int value) { + oop newInt = memcheck(malloc(sizeof(union object))); + newInt->type = Integer; + newInt->Integer.value = value; + return newInt; +} + +oop makeString(char *value) { + oop newString = memcheck(malloc(sizeof(union object))); + newString->type = String; + newString->String.value = memcheck(strdup(value)); + newString->String.size = strlen(value); + return newString; +} + +oop makeSymbol(char *name) { + oop newSymb = memcheck(malloc(sizeof(union object))); + newSymb->type = Symbol; + newSymb->Symbol.name = name; +# if defined(SYMBOL_INITIALISE) + SYMBOL_INITIALISE(newSymb->Symbol); +# endif // defined(SYMBOL_INITIALISE) + return newSymb; +} + +oop makeMap() { + oop newMap = memcheck(malloc(sizeof(union object))); + newMap->type = Map; + return newMap; +} + +ssize_t map_search(oop map, oop key) +{ + assert(map); assert(key); + ssize_t l = 0, r = get(map, Map, size) - 1; + while (l <= r) { + ssize_t mid = (l + r) / 2; + int cmpres = strcmp(get(get(map, Map, elements)[mid].key, String, value), get(key, String, value)); + if (cmpres > 0) r = mid - 1; + else if (cmpres < 0) l = mid + 1; + else return mid; // non-negative result => element found at this index + } + return -1 - l; // negative result => 'not found', reflected around -1 instead of 0 to allow 'not found' at index 0 +} + +oop map_get(oop map, oop key) { + assert(is(Map, map)); + assert(is(String, key)); + ssize_t pos = map_search(map, key); + if (pos < 0) return null; + return get(map, Map, elements)[pos].value; +} + +#define MAP_CHUNK_SIZE 8 + +oop map_set(oop map, oop key, oop value) { + assert(is(Map, map)); + assert(is(String, key)); + assert(value); + ssize_t pos = map_search(map, key); + if (pos >= 0) { + get(map, Map, elements)[pos].value = value; + // In your opinion, which is better in C + // - Writing "return map" here and then write the rest of the function's code flat + // - Or use this if / else statement (like here) because of the symmetry of the pb + // and the fact that we return the same stuff anyway + } else { + pos = -1 - pos; + // check capacity and expand if needed + if (get(map, Map, size) >= get(map, Map, capacity)) { + size_t newCapacity = get(map, Map, capacity) + MAP_CHUNK_SIZE; + set(map, Map, elements, memcheck(realloc( + get(map, Map, elements), + sizeof(struct Pair) * newCapacity)) + ); + set(map, Map, capacity, newCapacity); + } + // insert + memmove(get(map, Map, elements) + pos + 1, get(map, Map, elements) + pos, sizeof(struct Pair) * get(map, Map, size) - pos); + // Maybe this syntax is not very nice and I should access the Pair stuff differently? + // I mean modifying something on a line that begin with "get"... :/ + get(map, Map, elements)[pos].value = value; + get(map, Map, elements)[pos].key = key; + set(map, Map, size, ++get(map, Map, size)); + } + return map; +} + +oop map_del(oop map, oop key) { + assert(is(Map, map)); + assert(is(String, key)); + ssize_t pos = map_search(map, key); + if (pos < 0) return map; + if (pos < get(map, Map, size) - 1) { + memmove(get(map, Map, elements) + pos, get(map, Map, elements) + pos + 1, sizeof(struct Pair) * get(map, Map, size) - pos); + } + set(map, Map, size, --get(map, Map, size)); + return map; +} + + +void print(oop ast) { + assert(ast); + switch (ast->type) { + case Undefined: + printf("null"); + return; + case Integer: + printf("%i", get(ast, Integer, value)); + return; + case String: + printf("'%s'", get(ast, String, value)); + return; + case Symbol: + printf("%s=", get(ast, Symbol, name)); + print(get(ast, Symbol, value)); + return; + case Map: + printf("{"); + for (size_t i = 0; i < get(ast, Map, size); i++) { + printf(" "); + // I could write this instead but I want a special print for my string key name + // print(get(ast, map, elements)[i].key); + printf("%s", get(get(ast, Map, elements)[i].key, String, value)); + printf(": "); + print(get(ast, Map, elements)[i].value); + if (i < get(ast, Map, size) - 1) printf(","); + else printf(" "); + } + printf("}"); + return; + } + assert(0); +} + +void println(oop ast) +{ + print(ast); + printf("\n"); +} + +#define SYMBOL_TABLE_CHUNK 1024 + +typedef struct table_t { + oop *array; + size_t size; + size_t capacity; +} table_t; + +#define TABLE_INITIALISER { NULL, 0, 0 } // first call to table_insert() will initialise storage + +table_t table = TABLE_INITIALISER; // safe but not strictly needed on Unix because BSS segment is initialised to all zeroes + +ssize_t table_search(table_t *table, char *ident) +{ + assert(table); assert(ident); + ssize_t l = 0, r = table->size - 1; + while (l <= r) { + ssize_t mid = (l + r) / 2; + int cmpres = strcmp(get(table->array[mid], Symbol, name), ident); + if (cmpres > 0) r = mid - 1; + else if (cmpres < 0) l = mid + 1; + else return mid; // non-negative result => element found at this index + } + return -1 - l; // negative result => 'not found', reflected around -1 instead of 0 to allow 'not found' at index 0 +} + +// ssize_t result because -1 means 'error' +ssize_t table_insert(table_t *table, oop object, size_t pos) +{ + // Should I use in my code a function starting with _ or is it a convention to prevent its usage ? + /// You should never really have to use that function except implicitly via get/set. + /// If you need to insist on a particular type, check it explicitly and produce a real error messge or assertion failure. + assert(is(Symbol, object)); + if (pos > table->size) { // don't need to check for pos < 0 because size_t is unsigned + return -1; + } + + if (table->size >= table->capacity) { + // on the first call table->array will be NULL and realloc() will behave like malloc() + table->array = memcheck(realloc(table->array, sizeof(oop) * (table->capacity + SYMBOL_TABLE_CHUNK))); + table->capacity += SYMBOL_TABLE_CHUNK; + } + + memmove(table->array + pos + 1, table->array + pos, sizeof(*table->array) * (table->size - pos)); + table->array[pos] = object; + return ++(table->size); +} + +oop intern(char *ident) +{ + ssize_t res= table_search(&table, ident); // < 0 => not found + if (res >= 0) return table.array[res]; + res= -1 - res; // 'un-negate' the result by reflecting it around X=-1 + oop new_symbol = makeSymbol(memcheck(strdup(ident))); + table_insert(&table, new_symbol, res); + return new_symbol; +} diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..9a99856 --- /dev/null +++ b/test.txt @@ -0,0 +1,6 @@ +myObj = { prop1: 12+2, prop2: { subProp1: "hey", subProp2: --12 } } +myStr = "prop1" +myObj[myStr] +myObj.prop2 +myObj.undefinedProp +del myObj.prop1 \ No newline at end of file