浏览代码

Fix for object

pull/4/head
mtardy 4 年前
父节点
当前提交
abe0f07d32
共有 5 个文件被更改,包括 211 次插入176 次删除
  1. +1
    -3
      .gitignore
  2. +10
    -7
      Makefile
  3. +31
    -166
      calc.leg
  4. +166
    -0
      object.c
  5. +3
    -0
      test.txt

+ 1
- 3
.gitignore 查看文件

@ -3,10 +3,8 @@
# ignore generated parser
calc.c
# ignore compiled parser
calc
NOTES.txt
a.out
# macOS artifcats

+ 10
- 7
Makefile 查看文件

@ -1,15 +1,18 @@
CC=cc
PG=leg
LDFLAGS=-L/usr/local/lib -lgc
LEG = leg
CC = cc
CFLAGS = -Wall -g
LDLIBS = -L/usr/local/lib -lgc
cparser: calc.c
$(CC) $(LDFLAGS) -o calc calc.c
# moved LDLIBS to end because ld scans files from left to right and collects only required symbols
calc: calc.c object.c
$(CC) $(CFLAGS) -o calc calc.c $(LDLIBS)
calc.c: calc.leg
$(PG) -o calc.c calc.leg
$(LEG) -o calc.c calc.leg
obj: drafts/draft_object.c
$(CC) $(LDFLAGS) -o draft drafts/draft_object.c
$(CC) -o draft drafts/draft_object.c $(LDLIBS)
clean:
rm calc.c calc draft

+ 31
- 166
calc.leg 查看文件

@ -1,202 +1,67 @@
%{
/* compile: leg -o calc.c calc.leg
* cc -o calc calc.c
/* compile: leg -o calc.c calc.leg
* cc -o calc calc.c
*
* run: echo "2+3" | ./calc
* run: ( echo a=2*3; echo b=3+4; echo c=a*b ) | ./calc
*/
#define SYMBOL_PAYLOAD \
oop value; \
int defined
#include <stdbool.h>
#include <sysexits.h>
#include <assert.h>
#include <gc.h> // NEVER, EVER HAVE TO CALL FREE (EVER) AGAIN (YES, REALLY)
#define malloc(n) GC_MALLOC(n)
#define realloc(o, n) GC_REALLOC(o, n)
#define SYMBOL_INITIALISE(S) \
S.value = null; \
S.defined = false
typedef enum { Undefined, Integer, Symbol } type_t;
#include "object.c"
union object;
typedef union object *oop;
#define YYSTYPE oop
YYSTYPE yylval;
struct Undefined {
type_t type;
};
struct Integer {
type_t type;
int value;
};
struct Symbol {
type_t type;
char *name;
int defined;
oop value; // NULL
};
union object {
type_t type;
struct Undefined Undefined;
struct Integer Integer;
struct Symbol Symbol;
};
union object _null = { .Undefined = { Undefined } };
oop null = &_null;
oop _checkType(oop ptr, type_t type) {
assert(ptr->type == type);
return ptr;
}
#define get(PTR, TYPE, FIELD) _checkType(PTR, TYPE)->TYPE.FIELD
#define set(PTR, TYPE, FIELD, VALUE) _checkType(PTR, TYPE)->TYPE.FIELD = VALUE
void *memcheck(void *ptr)
{
if (NULL == ptr) {
fprintf(stderr, "Error: out of memory\n");
exit(EX_OSERR); // this is as close as we have for 'resource unavailable'
}
return ptr;
}
oop makeInteger(int value) {
oop newInt = memcheck(malloc(sizeof(union object)));
newInt->type = Integer;
newInt->Integer.value = value;
return newInt;
}
oop makeSymbol(char *name) {
oop newSymb = memcheck(malloc(sizeof(union object)));
newSymb->type = Symbol;
newSymb->Symbol.name = name;
newSymb->Symbol.defined = 0;
newSymb->Symbol.value = null;
return newSymb;
}
void print(oop ast) {
switch (ast->type) {
case Undefined:
printf("null");
return;
case Integer:
printf("%i", get(ast, Integer, value));
return;
case Symbol:
printf("%s=", get(ast, Symbol, name));
print(get(ast, Symbol, value));
return;
}
assert(0);
}
void println(oop ast)
{
print(ast);
printf("\n");
}
#define SYMBOL_TABLE_CHUNK 1024
typedef struct table_t {
oop *array;
size_t size;
size_t capacity;
} table_t;
#define TABLE_INITIALISER { NULL, 0, 0 } // first call to table_insert() will initialise storage
table_t table = TABLE_INITIALISER; // safe but not strictly needed on Unix because BSS segment is initialised to all zeroes
ssize_t table_search(table_t *table, char *ident)
{
ssize_t l = 0, r = table->size - 1;
while (l <= r) {
ssize_t mid = (l + r) / 2;
int cmpres = strcmp(get(table->array[mid], Symbol, name), ident);
if (cmpres > 0) r = mid - 1;
else if (cmpres < 0) l = mid + 1;
else return mid; // non-negative result => element found at this index
}
return -1 - l; // negative result => 'not found', reflected around -1 instead of 0 to allow 'not found' at index 0
}
// ssize_t result because -1 means 'error'
ssize_t table_insert(table_t *table, oop object, size_t pos)
{
// Should I use in my code a function starting with _ or is it a convention to prevent its usage ?
_checkType(object, Symbol);
if (pos > table->size) { // don't need to check for pos < 0 because size_t is unsigned
return -1;
}
if (table->size >= table->capacity) {
// on the first call table->array will be NULL and realloc() will behave like malloc()
table->array = memcheck(realloc(table->array, sizeof(oop) * (table->capacity + SYMBOL_TABLE_CHUNK)));
table->capacity += SYMBOL_TABLE_CHUNK;
}
memmove(table->array + pos + 1, table->array + pos, sizeof(*table->array) * (table->size - pos));
table->array[pos] = object;
return ++(table->size);
}
oop intern(char *ident)
{
ssize_t res= table_search(&table, ident); // < 0 => not found
if (res >= 0) return table.array[res];
res= -1 - res; // 'un-negate' the result by reflecting it around X=-1
oop new_symbol = makeSymbol(memcheck(strdup(ident)));
table_insert(&table, new_symbol, res);
return new_symbol;
}
// this should stay out of object.c because it manipulates Symbol members defined in this file
oop update_symbol_value(oop symbol, oop integer)
{
_checkType(symbol, Symbol);
//_checkType(symbol, Symbol);
// For now it will fail with assigning to null
// because for now we can write "a=2 b=a" because everything is passed by value
_checkType(integer, Integer);
symbol->Symbol.value = integer;
//_checkType(integer, Integer);
set(symbol, Symbol,value, integer); // checktyype is implicit, and it's ok for symbols to store any object value?
return symbol;
}
#define YYSTYPE oop
YYSTYPE yylval;
%}
start = e:exp { yylval = e }
exp = - (a:assign { $$ = a }
| s:sum { $$ = s }
)
exp = - (a:assign { $$ = a }
| s:sum { $$ = s }
)
assign = l:IDENT EQUAL n:sum { $$ = update_symbol_value(l, n) }
assign = l:IDENT EQUAL n:sum { $$ = update_symbol_value(l, n) }
sum = PLUS* l:prod
sum = l:prod ### removed PLUS* from beginning and made + a prefix operator just like - instead
( PLUS+ r:prod { get(l, Integer, value) += get(r, Integer, value) }
| MINUS r:prod { get(l, Integer, value) -= get(r, Integer, value) }
)* { $$ = l }
)* { $$ = l }
prod = l:neg
( MULTI r:neg { get(l, Integer, value) *= get(r, Integer, value) }
| DIVIDE r:neg { get(l, Integer, value) /= get(r, Integer, value) }
| MODULO r:neg { get(l, Integer, value) %= get(r, Integer, value) }
)* { $$ = l }
)* { $$ = l }
neg = MINUS n:neg { set(n, Integer, value, -get(n, Integer, value)); $$ = n }
| n:value { $$ = n }
neg = MINUS n:neg { set(n, Integer, value, -get(n, Integer, value)); $$ = n }
| PLUS n:neg { $$ = n } ### moved from sum
| n:value { $$ = n }
value = n:NUMBER { $$ = n }
| NULL { $$ = null; // For now it doesn't work because of _checktype in update_symbol_value() }
| l:IDENT { $$ = get(l, Symbol, value); // Will result in an assertion failed if ident is undefined }
| NULL { $$ = null; /* For now it doesn't work because of _checktype in update_symbol_value() */ }
| l:IDENT { $$ = get(l, Symbol, value); // Will result in an assertion failed if ident is undefined }
- = [ \t]*
- = [ \t\n\r]* ### added newline and carriage return to allow multi-line `programs'
NUMBER = < [0-9]+ > - { $$ = makeInteger(atoi(yytext)) }
IDENT = < [a-zA-Z][a-zA-Z0-9_]* > - { $$ = intern(yytext) }
PLUS = '+' -

+ 166
- 0
object.c 查看文件

@ -0,0 +1,166 @@
#include <stdbool.h>
#include <sysexits.h>
#include <assert.h>
#include <gc.h> // NEVER, EVER HAVE TO CALL FREE (EVER) AGAIN (YES, REALLY)
#define malloc(n) GC_MALLOC(n)
#define realloc(o, n) GC_REALLOC(o, n)
typedef enum { Undefined, Integer, Symbol } type_t;
union object;
typedef union object *oop;
struct Undefined {
type_t type;
};
struct Integer {
type_t type;
int value;
};
struct Symbol {
type_t type;
char *name;
# if defined(SYMBOL_PAYLOAD)
SYMBOL_PAYLOAD;
# endif // defined(SYMBOL_PAYLOAD)
};
union object {
type_t type;
struct Undefined Undefined;
struct Integer Integer;
struct Symbol Symbol;
};
union object _null = { .Undefined = { Undefined } };
oop null = &_null;
type_t getType(oop ptr) {
assert(ptr);
return ptr->type;
}
int is(type_t type, oop obj) {
return type == getType(obj);
}
oop _checkType(oop ptr, type_t type) {
assert(ptr);
assert(ptr->type == type);
return ptr;
}
// added parens around expansion to protect assignment
#define get(PTR, TYPE, FIELD) (_checkType(PTR, TYPE)->TYPE.FIELD)
#define set(PTR, TYPE, FIELD, VALUE) (_checkType(PTR, TYPE)->TYPE.FIELD = VALUE)
void *memcheck(void *ptr)
{
if (NULL == ptr) {
fprintf(stderr, "Error: out of memory\n");
exit(EX_OSERR); // this is as close as we have for 'resource unavailable'
}
return ptr;
}
oop makeInteger(int value) {
oop newInt = memcheck(malloc(sizeof(union object)));
newInt->type = Integer;
newInt->Integer.value = value;
return newInt;
}
oop makeSymbol(char *name) {
oop newSymb = memcheck(malloc(sizeof(union object)));
newSymb->type = Symbol;
newSymb->Symbol.name = name;
# if defined(SYMBOL_INITIALISE)
SYMBOL_INITIALISE(newSymb->Symbol);
# endif // defined(SYMBOL_INITIALISE)
return newSymb;
}
void print(oop ast) {
assert(ast);
switch (ast->type) {
case Undefined:
printf("null");
return;
case Integer:
printf("%i", get(ast, Integer, value));
return;
case Symbol:
printf("%s=", get(ast, Symbol, name));
print(get(ast, Symbol, value));
return;
}
assert(0);
}
void println(oop ast)
{
print(ast);
printf("\n");
}
#define SYMBOL_TABLE_CHUNK 1024
typedef struct table_t {
oop *array;
size_t size;
size_t capacity;
} table_t;
#define TABLE_INITIALISER { NULL, 0, 0 } // first call to table_insert() will initialise storage
table_t table = TABLE_INITIALISER; // safe but not strictly needed on Unix because BSS segment is initialised to all zeroes
ssize_t table_search(table_t *table, char *ident)
{
assert(table); assert(ident);
ssize_t l = 0, r = table->size - 1;
while (l <= r) {
ssize_t mid = (l + r) / 2;
int cmpres = strcmp(get(table->array[mid], Symbol, name), ident);
if (cmpres > 0) r = mid - 1;
else if (cmpres < 0) l = mid + 1;
else return mid; // non-negative result => element found at this index
}
return -1 - l; // negative result => 'not found', reflected around -1 instead of 0 to allow 'not found' at index 0
}
// ssize_t result because -1 means 'error'
ssize_t table_insert(table_t *table, oop object, size_t pos)
{
// Should I use in my code a function starting with _ or is it a convention to prevent its usage ?
/// You should never really have to use that function except implicitly via get/set.
/// If you need to insist on a particular type, check it explicitly and produce a real error messge or assertion failure.
assert(is(Symbol, object));
if (pos > table->size) { // don't need to check for pos < 0 because size_t is unsigned
return -1;
}
if (table->size >= table->capacity) {
// on the first call table->array will be NULL and realloc() will behave like malloc()
table->array = memcheck(realloc(table->array, sizeof(oop) * (table->capacity + SYMBOL_TABLE_CHUNK)));
table->capacity += SYMBOL_TABLE_CHUNK;
}
memmove(table->array + pos + 1, table->array + pos, sizeof(*table->array) * (table->size - pos));
table->array[pos] = object;
return ++(table->size);
}
oop intern(char *ident)
{
ssize_t res= table_search(&table, ident); // < 0 => not found
if (res >= 0) return table.array[res];
res= -1 - res; // 'un-negate' the result by reflecting it around X=-1
oop new_symbol = makeSymbol(memcheck(strdup(ident)));
table_insert(&table, new_symbol, res);
return new_symbol;
}

+ 3
- 0
test.txt 查看文件

@ -0,0 +1,3 @@
a = 2*3
b = 3+4
c = a*b

正在加载...
取消
保存