From 9c2a4e0c13da79a1d4c380d655ab7f657b959d3d Mon Sep 17 00:00:00 2001 From: mtardy Date: Tue, 23 Jun 2020 16:25:34 +0200 Subject: [PATCH 1/4] Add arbitrary var identifiers --- .gitignore | 3 ++ calc.leg | 106 ++++++++++++++++++++++++++++++++++++++++------------- test.sh | 3 +- 3 files changed, 86 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index b1b7b45..d332a97 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # vscode settings .vscode/ +# dicho +dicho.c + # ignore generated parser calc.c diff --git a/calc.leg b/calc.leg index 8a4d8c4..fa90bc5 100644 --- a/calc.leg +++ b/calc.leg @@ -6,9 +6,71 @@ * run: echo "2+3" | ./calc */ -#define YYSTYPE int -YYSTYPE yylval = 0; -int var[26]; +#define NSYMBOLS 256 + +union u_t { + int ival; + char *sval; +}; + +typedef struct symbol_t { + char *ident; + int value; +} symbol_t; + +typedef struct table_t { + symbol_t *array[NSYMBOLS]; + int size; +} table_t; + +#define YYSTYPE union u_t +YYSTYPE yylval; + +table_t table; + +// This buffer is for storing yytext identifier during assignement +char *ident_buf; + +symbol_t* new_symbol(char *ident, int value) +{ + symbol_t* s = malloc(sizeof(symbol_t)); + + char * ident_copy = malloc(strlen(ident) + 1); + strcpy(ident_copy, ident); + + free(ident_buf); + + s->ident = ident_copy; + s->value = value; + return s; +} + +symbol_t * add(char *ident, int value) +{ + symbol_t *new_s = new_symbol(ident, value); + + if (table.size >= NSYMBOLS) { + printf("Error: too many symbols\n"); + exit(1); + } + int i = table.size; + table.array[i] = new_s; + table.size++; + return table.array[i]; +} + +// Returns int value of identifier, when identifier is undefined, returns 0 +int get_value(const char *ident) +{ + for(int i=0; i < table.size; i++) { + //printf("comparaison %s, %s\n", table.array[i].ident, ident); + if (strcmp(table.array[i]->ident, ident) == 0) { + return table.array[i]->value; + } + } + // printf("'%s' identifier not found\n", ident); + return 0; +} %} @@ -18,49 +80,43 @@ exp = - (a:assign { $$ = a } | s:sum { $$ = s } ) -assign = l:LETTER EQUAL n:sum { $$ = var[l - 'a'] = n } +assign = l:IDENT { ident_buf = strdup(l.sval) } + EQUAL n:sum { $$.ival = add(ident_buf, n.ival)->value;} sum = PLUS* l:prod - ( PLUS+ r:prod { l += r } - | MINUS r:prod { l -= r } - )* { $$ = l } + ( PLUS+ r:prod { l.ival += r.ival } + | MINUS r:prod { l.ival -= r.ival } + )* { $$.ival = l.ival } prod = l:neg - ( MULTI r:neg { l *= r } - | DIVIDE r:neg { l /= r } - | MODULO r:neg { l %= r } - )* { $$ = l } + ( MULTI r:neg { l.ival *= r.ival } + | DIVIDE r:neg { l.ival /= r.ival } + | MODULO r:neg { l.ival %= r.ival } + )* { $$.ival = l.ival } -neg = MINUS n:neg { $$ = -n } - | n:value { $$ = n } +neg = MINUS n:neg { $$.ival = -n.ival } + | n:value { $$.ival = n.ival } -value = n:NUMBER { $$ = n } - | l:LETTER { $$ = var[l - 'a'] } +value = n:NUMBER { $$.ival = n.ival } + | l:IDENT { $$.ival = get_value(l.sval) } - = [ \t]* -NUMBER = < [0-9]+ > - { $$ = atoi(yytext) } +NUMBER = < [0-9]+ > - { $$.ival = atoi(yytext) } PLUS = '+' - MINUS = '-' - MULTI = '*' - DIVIDE = '/' - MODULO = '%' - -LETTER = (< [a-z] > { $$ = yytext[0] } - |< [A-Z] > { $$ = yytext[0] + 'a' - 'A' } - ) - EQUAL = '=' - +IDENT = < [a-zA-Z]+ > - { $$.sval = yytext } %% int main(int argc, char **argv) { while (yyparse()) { - printf("%d\n", yylval); - } - /* - for (int i = 0; i < 26; i++) { - printf("var[%d]=%d ", i, var[i]); + printf("%d\n", yylval.ival); } - */ return 0; } diff --git a/test.sh b/test.sh index 0e36626..48ba5a7 100755 --- a/test.sh +++ b/test.sh @@ -17,6 +17,7 @@ echo "-3*2+10*2" | ./${PROG} | cmp <(echo "14") && echo "test#11 passed!" || tru echo "- 5% 2 +2-6 / 2" | ./${PROG} | cmp <(echo "-2") && echo "test#12 passed!" || true echo "a=3" | ./${PROG} | cmp <(echo "3") && echo "test#13 passed!" || true echo "a=3 a*2" | ./${PROG} | cmp <(printf "3\n6\n") && echo "test#14 passed!" || true -echo "a=10 z=13 A+z" | ./${PROG} | cmp <(printf "10\n13\n23\n") && echo "test#15 passed!" || true +echo "a=10 z=13 a+z" | ./${PROG} | cmp <(printf "10\n13\n23\n") && echo "test#15 passed!" || true +echo "tamales=10 tomato=2 tamales*tomato" | ./${PROG} | cmp <(printf "10\n2\n20\n") && echo "test#16 passed!" || true make clean \ No newline at end of file From cd9d103d12020ecfa838ee0d4533e08ea650a582 Mon Sep 17 00:00:00 2001 From: mtardy Date: Wed, 24 Jun 2020 18:06:05 +0200 Subject: [PATCH 2/4] Add draft for correct symbol table implementation --- draft.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 draft.c diff --git a/draft.c b/draft.c new file mode 100644 index 0000000..18bcee0 --- /dev/null +++ b/draft.c @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#define SYMBOL_TABLE_CHUNK 4 + +typedef struct symbol_t { + char *ident; + bool defined; + int value; +} symbol_t; + + +typedef struct table_t { + symbol_t **array; + int size; + int capacity; +} table_t; + +table_t table; + +typedef struct bsearch_t { + int pos; + bool found; +} bsearch_t; + +bsearch_t binary_search(symbol_t *arr[], int l, int r, char *ident) +{ + while (r >= l) { + int mid = l + (r - l) / 2; + int cmpres = strcmp(arr[mid]->ident, ident); + if (cmpres > 0) { + r = mid - 1; + } else if (cmpres < 0) { + l = mid + 1; + } else { + bsearch_t res = { mid, true }; + return res; + } + } + bsearch_t res = { l, false }; + return res; +} + +int insert(table_t *table, symbol_t *element, int pos) +{ + if (pos < 0 || pos > table->size) { + return -1; + } + + if (table->size >= table->capacity) { + table->array = realloc(table->array, sizeof(symbol_t *) * (table->capacity + SYMBOL_TABLE_CHUNK)); + if (table->array == NULL) { + printf("Error: running out of memory\n"); + exit(1); + } + table->capacity += SYMBOL_TABLE_CHUNK; + } + + for (int i = table->size; i > pos; i--) { + table->array[i] = table->array[i-1]; + } + table->array[pos] = element; + return ++(table->size); +} + +symbol_t *intern(char *ident, bool create) +{ + bsearch_t res = binary_search(table.array, 0, table.size - 1, ident); + printf("pos:%d\n", res.pos); + if (create) { + symbol_t *new_symbol = malloc(sizeof(symbol_t)); + new_symbol->ident = strdup(ident); + new_symbol->defined = false; + if (res.found) { + free(table.array[res.pos]); + table.array[res.pos] = new_symbol; + } else { + printf("insert:%d\n", insert(&table, new_symbol, res.pos)); + } + return new_symbol; + } else { + if (res.found) { + return table.array[res.pos]; + } else { + return NULL; + } + } +} + +void init_table() +{ + table.array = malloc(sizeof(symbol_t *) * SYMBOL_TABLE_CHUNK); + if (table.array == NULL) { + printf("Error: running out of memory\n"); + exit(1); + } + table.size = 0; + table.capacity = SYMBOL_TABLE_CHUNK; +} + +int main() +{ + init_table(); + char line[256]; + intern("chaussure\n", true); + while (fgets(line, sizeof(line), stdin)) { + printf("intern:%p\n", intern(line, true)); + printf("after size:%d\n", table.size); + printf("after capacity:%d\n", table.capacity); + printf("\n"); + for (int i = 0; i < table.size; i++) { + printf("%d.%s", i, table.array[i]->ident); + } + printf("\n"); + } +} \ No newline at end of file From 01a1ae39193aaf270dd5ad14bf5f8a15b61e4c95 Mon Sep 17 00:00:00 2001 From: mtardy Date: Wed, 24 Jun 2020 19:40:36 +0200 Subject: [PATCH 3/4] Update draft for interning same symbol --- .gitignore | 5 ++--- draft.c | 11 +++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index d332a97..31fcf21 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,10 @@ # vscode settings .vscode/ -# dicho -dicho.c - # ignore generated parser calc.c # ignore compiled parser calc + +a.out diff --git a/draft.c b/draft.c index 18bcee0..52f32b1 100644 --- a/draft.c +++ b/draft.c @@ -10,7 +10,6 @@ typedef struct symbol_t { int value; } symbol_t; - typedef struct table_t { symbol_t **array; int size; @@ -69,13 +68,13 @@ symbol_t *intern(char *ident, bool create) bsearch_t res = binary_search(table.array, 0, table.size - 1, ident); printf("pos:%d\n", res.pos); if (create) { - symbol_t *new_symbol = malloc(sizeof(symbol_t)); - new_symbol->ident = strdup(ident); - new_symbol->defined = false; + symbol_t *new_symbol; if (res.found) { - free(table.array[res.pos]); - table.array[res.pos] = new_symbol; + new_symbol = table.array[res.pos]; } else { + new_symbol = malloc(sizeof(symbol_t)); + new_symbol->ident = strdup(ident); + new_symbol->defined = false; printf("insert:%d\n", insert(&table, new_symbol, res.pos)); } return new_symbol; From a3628b99376eec2da39de8c68a897573d482f9ab Mon Sep 17 00:00:00 2001 From: mtardy Date: Wed, 24 Jun 2020 20:19:48 +0200 Subject: [PATCH 4/4] Add draft ideas to calc.leg and clarify intern() --- calc.leg | 113 ++++++++++++++++++++++++++++++++++++++----------------- draft.c | 30 ++++++++------- 2 files changed, 95 insertions(+), 48 deletions(-) diff --git a/calc.leg b/calc.leg index fa90bc5..6ae6df3 100644 --- a/calc.leg +++ b/calc.leg @@ -6,72 +6,116 @@ * run: echo "2+3" | ./calc */ -#define NSYMBOLS 256 +#include union u_t { int ival; char *sval; }; +#define YYSTYPE union u_t +YYSTYPE yylval; + +#define SYMBOL_TABLE_CHUNK 1024 typedef struct symbol_t { char *ident; + bool defined; int value; } symbol_t; typedef struct table_t { - symbol_t *array[NSYMBOLS]; + symbol_t **array; int size; + int capacity; } table_t; -#define YYSTYPE union u_t -YYSTYPE yylval; - table_t table; -// This buffer is for storing yytext identifier during assignement +typedef struct bsearch_t { + int pos; + bool found; +} bsearch_t; + char *ident_buf; -symbol_t* new_symbol(char *ident, int value) +bsearch_t binary_search(symbol_t *arr[], int l, int r, char *ident) { - symbol_t* s = malloc(sizeof(symbol_t)); + while (r >= l) { + int mid = l + (r - l) / 2; + int cmpres = strcmp(arr[mid]->ident, ident); + if (cmpres > 0) { + r = mid - 1; + } else if (cmpres < 0) { + l = mid + 1; + } else { + bsearch_t res = { mid, true }; + return res; + } + } + bsearch_t res = { l, false }; + return res; +} - char * ident_copy = malloc(strlen(ident) + 1); - strcpy(ident_copy, ident); +int insert(table_t *table, symbol_t *element, int pos) +{ + if (pos < 0 || pos > table->size) { + return -1; + } - free(ident_buf); + if (table->size >= table->capacity) { + table->array = realloc(table->array, sizeof(symbol_t *) * (table->capacity + SYMBOL_TABLE_CHUNK)); + if (table->array == NULL) { + printf("Error: running out of memory\n"); + exit(1); + } + table->capacity += SYMBOL_TABLE_CHUNK; + } - s->ident = ident_copy; - s->value = value; - return s; + for (int i = table->size; i > pos; i--) { + table->array[i] = table->array[i-1]; + } + table->array[pos] = element; + return ++(table->size); } -symbol_t * add(char *ident, int value) +symbol_t *intern(char *ident, bool create) { - symbol_t *new_s = new_symbol(ident, value); + bsearch_t res = binary_search(table.array, 0, table.size - 1, ident); + if (res.found) { + return table.array[res.pos]; + } - if (table.size >= NSYMBOLS) { - printf("Error: too many symbols\n"); - exit(1); + if (create) { + symbol_t *new_symbol = malloc(sizeof(symbol_t)); + new_symbol->ident = strdup(ident); + new_symbol->defined = false; + insert(&table, new_symbol, res.pos); + return new_symbol; + } else { + return NULL; } - int i = table.size; - table.array[i] = new_s; - table.size++; - return table.array[i]; } -// Returns int value of identifier, when identifier is undefined, returns 0 -int get_value(const char *ident) +symbol_t *update_value(symbol_t * s, int value) { - for(int i=0; i < table.size; i++) { - //printf("comparaison %s, %s\n", table.array[i].ident, ident); - if (strcmp(table.array[i]->ident, ident) == 0) { - return table.array[i]->value; - } + s->value = value; + s->defined = true; + return s; +} + + +void init_table() +{ + table.array = malloc(sizeof(symbol_t *) * SYMBOL_TABLE_CHUNK); + if (table.array == NULL) { + printf("Error: running out of memory\n"); + exit(1); } - // printf("'%s' identifier not found\n", ident); - return 0; + table.size = 0; + table.capacity = SYMBOL_TABLE_CHUNK; } + %} start = e:exp { yylval = e } @@ -81,7 +125,7 @@ exp = - (a:assign { $$ = a } ) assign = l:IDENT { ident_buf = strdup(l.sval) } - EQUAL n:sum { $$.ival = add(ident_buf, n.ival)->value;} + EQUAL n:sum { symbol_t *nsymb = intern(ident_buf, true); $$.ival = update_value(nsymb, n.ival)->value; free(ident_buf) } sum = PLUS* l:prod ( PLUS+ r:prod { l.ival += r.ival } @@ -98,7 +142,7 @@ neg = MINUS n:neg { $$.ival = -n.ival } | n:value { $$.ival = n.ival } value = n:NUMBER { $$.ival = n.ival } - | l:IDENT { $$.ival = get_value(l.sval) } + | l:IDENT { symbol_t *fsymb = intern(l.sval, false); $$.ival = (fsymb != NULL) ? (fsymb->defined == true) ? fsymb->value : 0 : 0 } - = [ \t]* NUMBER = < [0-9]+ > - { $$.ival = atoi(yytext) } @@ -114,6 +158,7 @@ IDENT = < [a-zA-Z]+ > - { $$.sval = yytext } int main(int argc, char **argv) { + init_table(); while (yyparse()) { printf("%d\n", yylval.ival); } diff --git a/draft.c b/draft.c index 52f32b1..78d884e 100644 --- a/draft.c +++ b/draft.c @@ -67,23 +67,18 @@ symbol_t *intern(char *ident, bool create) { bsearch_t res = binary_search(table.array, 0, table.size - 1, ident); printf("pos:%d\n", res.pos); + if (res.found) { + return table.array[res.pos]; + } + if (create) { - symbol_t *new_symbol; - if (res.found) { - new_symbol = table.array[res.pos]; - } else { - new_symbol = malloc(sizeof(symbol_t)); - new_symbol->ident = strdup(ident); - new_symbol->defined = false; - printf("insert:%d\n", insert(&table, new_symbol, res.pos)); - } + symbol_t *new_symbol = malloc(sizeof(symbol_t)); + new_symbol->ident = strdup(ident); + new_symbol->defined = false; + printf("insert:%d\n", insert(&table, new_symbol, res.pos)); return new_symbol; } else { - if (res.found) { - return table.array[res.pos]; - } else { - return NULL; - } + return NULL; } } @@ -98,6 +93,13 @@ void init_table() table.capacity = SYMBOL_TABLE_CHUNK; } +symbol_t *update_value(symbol_t * s, int value) +{ + s->value = value; + s->defined = true; + return s; +} + int main() { init_table();