Browse Source

Add Map and String data structure

pull/4/head
mtardy 4 years ago
parent
commit
2b1a210344
4 changed files with 188 additions and 26 deletions
  1. +1
    -0
      .gitignore
  2. +58
    -21
      calc.leg
  3. +123
    -2
      object.c
  4. +6
    -3
      test.txt

+ 1
- 0
.gitignore View File

@ -4,6 +4,7 @@
# ignore generated parser
calc.c
calc
calc.dSYM/
NOTES.txt
a.out

+ 58
- 21
calc.leg View File

@ -6,11 +6,11 @@
* run: ( echo a=2*3; echo b=3+4; echo c=a*b ) | ./calc
*/
#define SYMBOL_PAYLOAD \
#define SYMBOL_PAYLOAD \
oop value; \
int defined
#define SYMBOL_INITIALISE(S) \
#define SYMBOL_INITIALISE(S) \
S.value = null; \
S.defined = false
@ -24,7 +24,7 @@ oop update_symbol_value(oop symbol, oop integer)
// For now it will fail with assigning to null
// because for now we can write "a=2 b=a" because everything is passed by value
//_checkType(integer, Integer);
set(symbol, Symbol,value, integer); // checktyype is implicit, and it's ok for symbols to store any object value?
set(symbol, Symbol, value, integer); // checktyype is implicit, and it's ok for symbols to store any object value?
return symbol;
}
@ -36,34 +36,61 @@ YYSTYPE yylval;
start = e:exp { yylval = e }
exp = - (a:assign { $$ = a }
| s:sum { $$ = s }
)
exp = - (a:assign { $$ = a }
| d:delete { $$ = d }
| p:prim { $$ = p }
)
assign = l:IDENT EQUAL n:sum { $$ = update_symbol_value(l, n) }
assign = l:IDENT EQUAL p:prim { $$ = update_symbol_value(l, p) }
sum = l:prod ### removed PLUS* from beginning and made + a prefix operator just like - instead
# it is really unhappy that I have to repeate the code in value because
# I need both information: map identifier and the key
delete = DEL
( i:IDENT DOT p:STRING
| i:IDENT LBRAC p:prim RBRAC
) { $$ = map_del(get(i, Symbol, value), p) }
prim = ( s:sum { $$ = s }
| s:string { $$ = s }
| m:map { $$ = m }
)
# Map
map = LCB RCB { $$ = makeMap() }
| LCB p:prop RCB { $$ = p; }
prop = k:STRING COLON v:prim COMMA p:prop { $$ = map_set(p, k, v) }
| k:STRING COLON v:prim { $$ = map_set(makeMap(), k, v) }
# String
string = (SQUOTE | DQUOTE) s:STRING (SQUOTE | DQUOTE) { $$ = s }
# Number
sum = l:prod
( PLUS+ r:prod { get(l, Integer, value) += get(r, Integer, value) }
| MINUS r:prod { get(l, Integer, value) -= get(r, Integer, value) }
)* { $$ = l }
)* { $$ = l }
prod = l:neg
( MULTI r:neg { get(l, Integer, value) *= get(r, Integer, value) }
| DIVIDE r:neg { get(l, Integer, value) /= get(r, Integer, value) }
| MODULO r:neg { get(l, Integer, value) %= get(r, Integer, value) }
)* { $$ = l }
prod = l:sign
( MULTI r:sign { get(l, Integer, value) *= get(r, Integer, value) }
| DIVIDE r:sign { get(l, Integer, value) /= get(r, Integer, value) }
| MODULO r:sign { get(l, Integer, value) %= get(r, Integer, value) }
)* { $$ = l }
neg = MINUS n:neg { set(n, Integer, value, -get(n, Integer, value)); $$ = n }
| PLUS n:neg { $$ = n } ### moved from sum
| n:value { $$ = n }
sign = MINUS n:sign { set(n, Integer, value, -get(n, Integer, value)); $$ = n }
| PLUS n:sign { $$ = n }
| n:value { $$ = n }
value = n:NUMBER { $$ = n }
| NULL { $$ = null; /* For now it doesn't work because of _checktype in update_symbol_value() */ }
| l:IDENT { $$ = get(l, Symbol, value); // Will result in an assertion failed if ident is undefined }
| NULL { $$ = null }
| i:IDENT DOT s:STRING { $$ = map_get(get(i, Symbol, value), s) }
| i:IDENT LBRAC p:prim RBRAC { $$ = map_get(get(i, Symbol, value), p) }
| i:IDENT { $$ = get(i, Symbol, value) }
- = [ \t\n\r]* ### added newline and carriage return to allow multi-line `programs'
NUMBER = < [0-9]+ > - { $$ = makeInteger(atoi(yytext)) }
- = [ \t\n\r]*
IDENT = < [a-zA-Z][a-zA-Z0-9_]* > - { $$ = intern(yytext) }
STRING = < [a-zA-Z][a-zA-Z0-9_]* > - { $$ = makeString(yytext) }
NUMBER = < [0-9]+ > - { $$ = makeInteger(atoi(yytext)) }
PLUS = '+' -
MINUS = '-' -
MULTI = '*' -
@ -71,6 +98,16 @@ DIVIDE = '/' -
MODULO = '%' -
EQUAL = '=' -
NULL = 'null' -
DEL = 'del' -
COLON = ':' -
COMMA = ',' -
DOT = '.' -
LCB = '{' -
RCB = '}' -
LBRAC = '[' -
RBRAC = ']' -
DQUOTE = '"' -
SQUOTE = "'" -
%%

+ 123
- 2
object.c View File

@ -6,7 +6,7 @@
#define malloc(n) GC_MALLOC(n)
#define realloc(o, n) GC_REALLOC(o, n)
typedef enum { Undefined, Integer, Symbol } type_t;
typedef enum { Undefined, Integer, String, Symbol, Map } type_t;
union object;
typedef union object *oop;
@ -20,6 +20,12 @@ struct Integer {
int value;
};
struct String {
type_t type;
char *value;
size_t size;
};
struct Symbol {
type_t type;
char *name;
@ -28,11 +34,25 @@ struct Symbol {
# endif // defined(SYMBOL_PAYLOAD)
};
struct Pair {
oop key;
oop value;
};
struct Map {
type_t type;
struct Pair *elements; // even are keys, odd are values [ key val key val key val ]
size_t size;
size_t capacity;
};
union object {
type_t type;
struct Undefined Undefined;
struct Integer Integer;
struct String String;
struct Symbol Symbol;
struct Map Map;
};
union object _null = { .Undefined = { Undefined } };
@ -55,7 +75,7 @@ oop _checkType(oop ptr, type_t type) {
// added parens around expansion to protect assignment
#define get(PTR, TYPE, FIELD) (_checkType(PTR, TYPE)->TYPE.FIELD)
#define get(PTR, TYPE, FIELD) (_checkType(PTR, TYPE)->TYPE.FIELD)
#define set(PTR, TYPE, FIELD, VALUE) (_checkType(PTR, TYPE)->TYPE.FIELD = VALUE)
void *memcheck(void *ptr)
@ -74,6 +94,14 @@ oop makeInteger(int value) {
return newInt;
}
oop makeString(char *value) {
oop newString = memcheck(malloc(sizeof(union object)));
newString->type = String;
newString->String.value = memcheck(strdup(value));
newString->String.size = strlen(value);
return newString;
}
oop makeSymbol(char *name) {
oop newSymb = memcheck(malloc(sizeof(union object)));
newSymb->type = Symbol;
@ -84,6 +112,82 @@ oop makeSymbol(char *name) {
return newSymb;
}
oop makeMap() {
oop newMap = memcheck(malloc(sizeof(union object)));
newMap->type = Map;
return newMap;
}
ssize_t map_search(oop map, oop key)
{
assert(map); assert(key);
ssize_t l = 0, r = get(map, Map, size) - 1;
while (l <= r) {
ssize_t mid = (l + r) / 2;
int cmpres = strcmp(get(get(map, Map, elements)[mid].key, String, value), get(key, String, value));
if (cmpres > 0) r = mid - 1;
else if (cmpres < 0) l = mid + 1;
else return mid; // non-negative result => element found at this index
}
return -1 - l; // negative result => 'not found', reflected around -1 instead of 0 to allow 'not found' at index 0
}
oop map_get(oop map, oop key) {
assert(is(Map, map));
assert(is(String, key));
ssize_t pos = map_search(map, key);
if (pos < 0) return null;
return get(map, Map, elements)[pos].value;
}
#define MAP_CHUNK_SIZE 8
oop map_set(oop map, oop key, oop value) {
assert(is(Map, map));
assert(is(String, key));
assert(value);
ssize_t pos = map_search(map, key);
if (pos >= 0) {
get(map, Map, elements)[pos].value = value;
// In your opinion, which is better in C
// - Writing "return map" here and then write the rest of the function's code flat
// - Or use this if / else statement (like here) because of the symmetry of the pb
// and the fact that we return the same stuff anyway
} else {
pos = -1 - pos;
// check capacity and expand if needed
if (get(map, Map, size) >= get(map, Map, capacity)) {
size_t newCapacity = get(map, Map, capacity) + MAP_CHUNK_SIZE;
set(map, Map, elements, memcheck(realloc(
get(map, Map, elements),
sizeof(struct Pair) * newCapacity))
);
set(map, Map, capacity, newCapacity);
}
// insert
memmove(get(map, Map, elements) + pos + 1, get(map, Map, elements) + pos, sizeof(struct Pair) * get(map, Map, size) - pos);
// Maybe this syntax is not very nice and I should access the Pair stuff differently?
// I mean modifying something on a line that begin with "get"... :/
get(map, Map, elements)[pos].value = value;
get(map, Map, elements)[pos].key = key;
set(map, Map, size, ++get(map, Map, size));
}
return map;
}
oop map_del(oop map, oop key) {
assert(is(Map, map));
assert(is(String, key));
ssize_t pos = map_search(map, key);
if (pos < 0) return map;
if (pos < get(map, Map, size) - 1) {
memmove(get(map, Map, elements) + pos, get(map, Map, elements) + pos + 1, sizeof(struct Pair) * get(map, Map, size) - pos);
}
set(map, Map, size, --get(map, Map, size));
return map;
}
void print(oop ast) {
assert(ast);
switch (ast->type) {
@ -93,10 +197,27 @@ void print(oop ast) {
case Integer:
printf("%i", get(ast, Integer, value));
return;
case String:
printf("'%s'", get(ast, String, value));
return;
case Symbol:
printf("%s=", get(ast, Symbol, name));
print(get(ast, Symbol, value));
return;
case Map:
printf("{");
for (size_t i = 0; i < get(ast, Map, size); i++) {
printf(" ");
// I could write this instead but I want a special print for my string key name
// print(get(ast, map, elements)[i].key);
printf("%s", get(get(ast, Map, elements)[i].key, String, value));
printf(": ");
print(get(ast, Map, elements)[i].value);
if (i < get(ast, Map, size) - 1) printf(",");
else printf(" ");
}
printf("}");
return;
}
assert(0);
}

+ 6
- 3
test.txt View File

@ -1,3 +1,6 @@
a = 2*3
b = 3+4
c = a*b
myObj = { prop1: 12+2, prop2: { subProp1: "hey", subProp2: --12 } }
myStr = "prop1"
myObj[myStr]
myObj.prop2
myObj.undefinedProp
del myObj.prop1

Loading…
Cancel
Save