Dynamic PEG for interpreted languages.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

808 lines
18 KiB

%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <math.h>
#include "inputBuffer.c"
#include "class.c"
;
void fatal(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "\nError: ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
exit(1);
}
enum op { String, Query, Star, Plus, Or, And, Class, Dot, Exc, Et, Id} ;
typedef union Node Node;
typedef struct Symbol Symbol;
typedef struct Array Array;
typedef struct SymbolTable SymbolTable;
typedef struct NodeCount NodeCount;
struct Array {
Node **elements;
int length;
};
struct String { enum op type; char *string; int len; };
struct Query { enum op type; Node *children[1]; };
struct Star { enum op type; Node *children[1]; };
struct Plus { enum op type; Node *children[1]; };
struct Or { enum op type; Node *children[2]; };
struct And { enum op type; Node *children[2]; };
struct Class { enum op type; char *array; };
struct Dot { enum op type; };
struct Exc { enum op type; Node *children[1]; };
struct Et { enum op type; Node *children[1]; };
struct Id { enum op type; Symbol *symbol; };
union Node {
enum op type;
struct String String;
struct Query Query;
struct Star Star;
struct Plus Plus;
struct Or Or;
struct And And;
struct Class Class;
struct Dot Dot;
struct Exc Exc;
struct Et Et;
struct Id Id;
};
struct Symbol{
char* name;
Node *rule;
};
struct SymbolTable {
Symbol **elements;
int length;
};
#define SymbolTable_initialiser {0,0}
SymbolTable symbolTable= SymbolTable_initialiser;
Symbol *createSymbol(char *name) {
Symbol *symbol= calloc(1, sizeof(Symbol));
symbol->name= strdup(name);
return symbol;
}
#define new(type) mkNode(sizeof(struct type),type)
Node *mkNode(size_t size,enum op type)
{
Node *node= calloc(1, size);
node->type= type;
return node;
}
Node *mkString(char *s)
{
Node *node= new(String);
node->String.string= strdup(s);
node->String.len=strlen(s);
return node;
}
Node *mkId(Symbol *s){
Node *node= new(Id);
node->Id.symbol=s;
return node;
}
Node *mkQuery(Node *n)
{
Node *node= new(Query);
node->Query.children[0]= n;
return node;
}
Node *mkOr(Node *node1, Node *node2)
{
Node *node= new(Or);
node->Or.children[0]= node1;
node->Or.children[1]= node2;
return node;
}
Node *mkAnd(Node *node1, Node *node2)
{
Node *node= new(And);
node->And.children[0]= node1;
node->And.children[1]= node2;
return node;
}
Node *mkStar(Node *n)
{
Node *node= new(Star);
node->Star.children[0]= n;
return node;
}
Node *mkClass(char* str)
{
Node *node= new(Class);
node->Class.array= str;
return node;
}
Node *mkPlus(Node *n)
{
Node *node= new(Plus);
node->Plus.children[0]= n;
return node;
}
Node *mkDot()
{
Node *node= new(Dot);
return node;
}
Node *mkExc(Node *n)
{
Node *node= new(Exc);
node->Exc.children[0]= n;
return node;
}
Node *mkEt(Node *n)
{
Node *node= new(Et);
node->Et.children[0]= n;
return node;
}
Node *_checktype(Node *object, enum op type)
{
if (object->type == type) return object;
fprintf(stderr, "\naccesing type %i as if it were a %i\n", object->type, type);
exit(1);
return 0;
}
#define get(object, type, member) (_checktype(object, type)->type.member)
void print(Node *node)
{
switch (node->type) {
case String:
printf("\"%s\"", node->String.string);
return;
case Query:
print(node->Query.children[0]);
printf("?");
return;
case Star:
print(node->Query.children[0]);
printf("*");
return;
case Plus:
print(node->Query.children[0]);
return;
case Or:
print(node->Or.children[0]);
printf("Or");
print(node->Or.children[1]);
return;
case And:
print(node->And.children[0]);
printf("And");
print(node->And.children[1]);
return;
case Class:
printf("Class");
printf("\"%s\"", node->Class.array);
return;
case Dot:
printf("Dot");
return;
case Exc:
printf("!");
print(node->Exc.children[0]);
return;
case Et:
printf("&");
print(node->Et.children[0]);
return;
case Id:
printf("%s\n",get(node,Id,symbol)->name);
return;
}
abort();
}
void println(Node *node)
{
print(node);
printf("\n");
}
InputBuffer *inputBuffer=0;
#define YY_INPUT(buff,result,maxSize) \
{if (atEnd(inputBuffer)){ \
result=0; \
} \
else { \
*buff=currentChar(inputBuffer); \
advance(inputBuffer,1); \
result=1; \
}}
Symbol *intern(char *name){
int left=0,right=symbolTable.length-1;
while (left<=right) {
int middle=(left+right)/2;
int comp=strcmp(name,symbolTable.elements[middle]->name);
if(comp<0){
right=middle-1;
}
else if(comp>0){
left=middle+1;
}
else{
return symbolTable.elements[middle];
}
}
symbolTable.elements= realloc(symbolTable.elements,sizeof(symbolTable.elements[0]) * (symbolTable.length+1));
memmove(symbolTable.elements+left+1,symbolTable.elements+left,(symbolTable.length-left)*sizeof(symbolTable.elements[0]));
symbolTable.length++;
return symbolTable.elements[left]=createSymbol(name);
}
void setRule(char *name, Node *rule)
{
printf("Setting rule %s to ", name);
println(rule);
intern(name)->rule=rule;
}
#define YYSTYPE Node *
%}
start = - declaration+
declaration = i:id '=' - e:expression { setRule(get(i, Id, symbol)->name, e) }
expression = or
or = a:and "|" - o:or { $$ = mkOr(o, a) }
| a:and { $$ = a }
and = p:prefix a:and { $$ = mkAnd(p, a) }
| p:prefix { $$ = p }
prefix = "!" - p : postfix { $$ = mkExc(p) }
| "&" - p : postfix { $$ = mkEt(p) }
| p: postfix { $$ = p}
postfix = s:atom ( "?" - { s = mkQuery(s) }
| "*" - { s = mkStar(s) }
| "+" - { s = mkPlus(s) }
)? { $$ = s }
atom = string | class | dot | rule | '(' - expression ')' -
rule = i:id !'=' { $$ = mkId(intern(yytext)) }
id = < [a-zA-z_][a-zA-z_0-9]* > - { $$ = mkId(intern(yytext)) }
string = '"' < [^\"]* > '"' - { $$ = mkString(yytext) }
class = '['-<(!']'.)*> ']' - { $$=mkClass(classify(yytext)) }
dot = '.' - { $$=mkDot() }
- = space*
space = [ \t] | '\n' '\r'* | '\r' '\n'*
%%
int execute(Node *node, InputBuffer *in)
{
switch (node->type) {
case String: {
if (strncmp(currentText(in), get(node,String,string), get(node,String,len))) {
return 0;
}
advance(in, get(node,String,len));
return 1;
}
case And: {
int pos= getPosition(in);
if (!execute(get(node,And,children[0]), in)) //si il y a eu une erreur
{
return 0;
} //si ça s'est bien passé
if (!execute(get(node,And,children[1]), in)) {
setPosition(in, pos);
return 0;
}
return 1;
}
case Or: {
if (execute(get(node,Or,children[0]), in)) {
return 1;
}
return execute(get(node,Or,children[1]), in);
}
case Star: {
while (execute(get(node,Star,children[0]), in));
return 1;
}
case Plus: {
if (!execute(get(node,Plus,children[0]), in)) {
return 0;
}
while (execute(get(node,Plus,children[0]), in));
return 1;
}
case Class: {
if (testBit(get(node,Class,array),currentChar(in))) {
advance(in, 1);
return 1;
}
return 0;
}
case Query: {
execute(get(node,Query,children[0]), in);
return 1;
}
case Exc: {
int pos= getPosition(in);
if (!execute(get(node,Exc,children[0]), in)) {
return 1;
}
setPosition(in, pos);
return 0;
}
case Et: {
int pos= getPosition(in);
if (!execute(get(node,Et,children[0]), in)) {
return 0;
}
setPosition(in, pos);
return 1;
}
case Dot: {
if (atEnd(in)) {
return 0;
}
advance(in, 1);
return 1;
}
case Id: {
Symbol *symbol= get(node, Id, symbol);
if (0 == symbol->rule) { printf("undefined rule: %s\n", symbol->name); }
return execute(symbol->rule, in);
}
}
printf("this cannot happen\n");
abort();
}
struct NodeCount{
int String;
int Query;
int Star;
int Plus;
int Or;
int And;
int Class;
int Dot;
int Exc;
int Et;
int Id;
};
NodeCount nodeCount={
.String=0,
.Query=0,
.Star=0,
.Plus=0,
.Or=0,
.And=0,
.Class=0,
.Dot=0,
.Exc=0,
.Et=0,
.Id=0
};
void printVariableName(enum op type, NodeCount count, int increment){
char number[50] ;
char *dest=malloc(30);
size_t n;
switch(type){
case String :
printf("string%i",nodeCount.String);
if(increment==1)nodeCount.String++;
break;
case Query :
if(increment==1)nodeCount.Query++;
number[0]=nodeCount.Query+'0';
n = strlen(number);
strncpy(dest,"query",5);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Star:
printf("star%i",nodeCount.Star);
if(increment==1) nodeCount.Star++;
break;
case Plus :
if(increment==1) nodeCount.Plus++;
number[0]=nodeCount.Plus+'0';
n = strlen(number);
strncpy(dest,"plus",4);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Or :
if(increment==1) nodeCount.Or++;
number[0]=nodeCount.Or+'0';
n = strlen(number);
strncpy(dest,"or",2);
strncat(dest, number, n);
printf("%s ",dest);
break;
case And:
if(increment==1) nodeCount.And++;
number[0]=nodeCount.And+'0';
n = strlen(number);
strncpy(dest,"and",3);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Class:
if(increment==1)nodeCount.Class++;
number[0]=nodeCount.Class+'0';
n = strlen(number);
strncpy(dest,"class",5);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Dot:
if(increment==1) nodeCount.Dot++;
number[0]=nodeCount.Dot+'0';
n = strlen(number);
strncpy(dest,"dot",3);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Exc:
if(increment==1) nodeCount.Exc++;
number[0]=nodeCount.Exc+'0';
n = strlen(number);
strncpy(dest,"exc",3);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Et:
if(increment==1) nodeCount.Et++;
number[0]=nodeCount.Et+'0';
n = strlen(number);
strncpy(dest,"et",2);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Id:
if(increment==1) nodeCount.Id++;
number[0]=nodeCount.Id+'0';
n = strlen(number);
strncpy(dest,"id",2);
strncat(dest, number, n);
printf("%s ",dest);
break;
}
}
int nodeNumber=0;
int printCode(Node *node)
{
switch (node->type) {
case String:{
int thisNumber=nodeNumber++;
printf("Node node%i = { .String = { String, ",thisNumber);
printf("\"%s\"", node->String.string);
printf(" , %i }}; \n",node->String.len);
return thisNumber;
}
case Query:
printVariableName(node->type,nodeCount,1);
printf("= { .Query = { Query, &");
printVariableName(node->Query.children[0]->type,nodeCount,0);
printf(" }};\n");
return 0;
case Star: {
int i = printCode(node->Star.children[0]);
int thisNumber=nodeNumber++;
printf("Node node%i",thisNumber);
printf("= { .Star = { Star, &node%i }};\n",i);
return thisNumber;
}
case Plus:{
int i = printCode(node->Star.children[0]);
int thisNumber=nodeNumber++;
printf("Node node%i = { .Plus = { Plus, &node%i }};\n",thisNumber,i);
return thisNumber;
}
case Or:{
int i = printCode(node->Star.children[0]);
int j = printCode(node->Star.children[1]);
int thisNumber=nodeNumber++;
printf("Node node%i= { .Or = { Or, &node%i",thisNumber,i);
thisNumber=nodeNumber++;
printf(" , &node%i }};\n",j);
return thisNumber;
}
case And:{
int i = printCode(node->Star.children[0]);
int j = printCode(node->Star.children[1]);
int thisNumber=nodeNumber++;
printf("Node node%i= { .And = { And, &node%i",thisNumber,i);
thisNumber=nodeNumber++;
printf(" , &node%i }};\n",j);
return thisNumber;
}
case Class:{
int thisNumber=nodeNumber++;
printf("Node node%i= { .Class = { Class, %s }};\n",thisNumber,node->Class.array);
return thisNumber;
}
case Dot:{
int thisNumber=nodeNumber++;
printf("Node node%i= { .Dot = { Dot}};\n",thisNumber);
return thisNumber;
}
case Exc:{
int i = printCode(node->Star.children[0]);
int thisNumber=nodeNumber++;
printf("Node node%i = { .Exc = { Exc, &node%i }};\n",thisNumber,i);
return 0;
}
case Et:
printVariableName(node->type,nodeCount,1);
printf("= { .Et = { Et, &");
printVariableName(node->Et.children[0]->type,nodeCount,0);
printf(" }};\n");
return 0;
case Id:
printVariableName(node->type,nodeCount,1);
printf("= { .Id = { Id, symb");
printf(" }};\n");
return 0;
}
abort();
return 0;
}
void printlnCode(Node *node)
{
printCode(node);
printf("\n");
}
int main(int argc, char **argv)
{
char *opt_f=0;
char *opt_c=0;
char *opt__=0;
for(int i=1;i<argc;i++){
char *arg=argv[i];
if(!strcmp(arg,"-f") && i<argc-1){
i++;
opt_f=argv[i];
continue;
}
if(!strcmp(arg,"-c") && i<argc-1){
i++;
opt_c=argv[i];
continue;
}
opt__=arg;
}
if (opt_f && opt__) fatal("file and command line expressions cannot both be supplied");
if (!opt_f && !opt__) fatal("no expression specified");
if (opt__) inputBuffer = mkInputBuffer(argv[1]);
if (opt_f){
FILE *fp= fopen(argv[2], "r");
if (!fp) {
perror(argv[2]);
exit(1);
}
struct stat sb;
if (fstat(fileno(fp), &sb)) {
perror(argv[2]);
exit(1);
}
char *text= malloc(sb.st_size);
if (!text) {
fatal("out of memory");
exit(1);
}
if (fread(text, sb.st_size, 1, fp) < 1) {
perror(argv[2]);
exit(1);
}
fclose(fp);
inputBuffer = mkInputBuffer(text);
}
if (!yyparse()) {
printf("Error\n");
return 1;
}
if( opt_c){
int i=printCode(intern("start")->rule);
printf("Node *start_rule= &node%i;\n",i);
return 0;
}
char *line=0;
size_t line_max=0;
ssize_t line_len=0;
Node *startRule= intern("start")->rule;
if (!startRule) {
fatal("no start rule");
return 1;
}
while ((line_len=getline(&line,&line_max,stdin))>=0) {
if (line_len>0 && line[line_len-1]=='\n') {
line[line_len-1]=0;
}
initInputBuffer(inputBuffer,line);
if (!execute(startRule, inputBuffer) || !atEnd(inputBuffer)) {
printf("no match, current position : %i\n", getPosition(inputBuffer));
}
else {
printf("match, current position : %i\n", getPosition(inputBuffer));
} // 0 => no match, 1 => match
}
return 0;
/*
switch (argc) {
case 1:{
int char_index=0;
char *text_file= malloc(50);
int ch;
while ( (ch = getchar()) != EOF ) {
printf("%c",ch);
text_file[char_index]=ch;
char_index++;
}
printf("%s",text_file);
inputBuffer = mkInputBuffer(text_file);
break;
}
case 2: {
inputBuffer = mkInputBuffer(argv[1]);
break;
}
case 3: {
if (!strcmp("-f", argv[1])) {
FILE *fp= fopen(argv[2], "r");
if (!fp) {
perror(argv[2]);
exit(1);
}
struct stat sb;
if (fstat(fileno(fp), &sb)) {
perror(argv[2]);
exit(1);
}
char *text= malloc(sb.st_size);
if (!text) {
fatal("out of memory");
exit(1);
}
if (fread(text, sb.st_size, 1, fp) < 1) {
perror(argv[2]);
exit(1);
}
fclose(fp);
inputBuffer = mkInputBuffer(text);
break;
}
fatal("unrecognised option: %s", argv[1]);
}
default: {
fatal("usage: %s parsing-expression | -f filename", argv[0]);
exit(1);
}
}
if (!yyparse()) {
printf("Error\n");
return 1;
}
char *line=0;
size_t line_max=0;
ssize_t line_len=0;
Node *startRule= intern("start")->rule;
if (!startRule) {
fatal("no start rule");
return 1;
}
while ((line_len=getline(&line,&line_max,stdin))>=0) {
if (line_len>0 && line[line_len-1]=='\n') {
line[line_len-1]=0;
}
initInputBuffer(inputBuffer,line);
if (!execute(startRule, inputBuffer) || !atEnd(inputBuffer)) {
printf("no match, current position : %i\n", getPosition(inputBuffer));
}
else {
printf("match, current position : %i\n", getPosition(inputBuffer));
} // 0 => no match, 1 => match
}
return 0;
*/
(void)yySet;
(void)yyPop;
(void)yyPush;
(void)yyAccept;
(void)yymatchDot;
(void)yymatchString;
(void)yymatchChar;
}