Dynamic PEG for interpreted languages.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

816 lignes
18 KiB

%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <math.h>
#include "inputBuffer.c"
#include "class.c"
;
void fatal(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "\nError: ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
exit(1);
}
enum op { String, Query, Star, Plus, Or, And, Class, Dot, Exc, Et, Id} ;
typedef union Node Node;
typedef struct Symbol Symbol;
typedef struct Array Array;
typedef struct SymbolTable SymbolTable;
typedef struct NodeCount NodeCount;
struct Array {
Node **elements;
int length;
};
struct String { enum op type; char *string; int len; };
struct Query { enum op type; Node *children[1]; };
struct Star { enum op type; Node *children[1]; };
struct Plus { enum op type; Node *children[1]; };
struct Or { enum op type; Node *children[2]; };
struct And { enum op type; Node *children[2]; };
struct Class { enum op type; char *array; };
struct Dot { enum op type; };
struct Exc { enum op type; Node *children[1]; };
struct Et { enum op type; Node *children[1]; };
struct Id { enum op type; Symbol *symbol; };
union Node {
enum op type;
struct String String;
struct Query Query;
struct Star Star;
struct Plus Plus;
struct Or Or;
struct And And;
struct Class Class;
struct Dot Dot;
struct Exc Exc;
struct Et Et;
struct Id Id;
};
struct Symbol{
char* name;
Node *rule;
};
struct SymbolTable {
Symbol **elements;
int length;
};
#define SymbolTable_initialiser {0,0}
SymbolTable symbolTable= SymbolTable_initialiser;
Symbol *createSymbol(char *name) {
Symbol *symbol= calloc(1, sizeof(Symbol));
symbol->name= strdup(name);
return symbol;
}
#define new(type) mkNode(sizeof(struct type),type)
Node *mkNode(size_t size,enum op type)
{
Node *node= calloc(1, size);
node->type= type;
return node;
}
Node *mkString(char *s)
{
Node *node= new(String);
node->String.string= strdup(s);
node->String.len=strlen(s);
return node;
}
Node *mkId(Symbol *s){
Node *node= new(Id);
node->Id.symbol=s;
return node;
}
Node *mkQuery(Node *n)
{
Node *node= new(Query);
node->Query.children[0]= n;
return node;
}
Node *mkOr(Node *node1, Node *node2)
{
Node *node= new(Or);
node->Or.children[0]= node1;
node->Or.children[1]= node2;
return node;
}
Node *mkAnd(Node *node1, Node *node2)
{
Node *node= new(And);
node->And.children[0]= node1;
node->And.children[1]= node2;
return node;
}
Node *mkStar(Node *n)
{
Node *node= new(Star);
node->Star.children[0]= n;
return node;
}
Node *mkClass(char* str)
{
Node *node= new(Class);
node->Class.array= str;
return node;
}
Node *mkPlus(Node *n)
{
Node *node= new(Plus);
node->Plus.children[0]= n;
return node;
}
Node *mkDot()
{
Node *node= new(Dot);
return node;
}
Node *mkExc(Node *n)
{
Node *node= new(Exc);
node->Exc.children[0]= n;
return node;
}
Node *mkEt(Node *n)
{
Node *node= new(Et);
node->Et.children[0]= n;
return node;
}
Node *_checktype(Node *object, enum op type)
{
if (object->type == type) return object;
fprintf(stderr, "\naccesing type %i as if it were a %i\n", object->type, type);
exit(1);
return 0;
}
#define get(object, type, member) (_checktype(object, type)->type.member)
void print(Node *node)
{
switch (node->type) {
case String:
printf("\"%s\"", node->String.string);
return;
case Query:
print(node->Query.children[0]);
printf("?");
return;
case Star:
print(node->Query.children[0]);
printf("*");
return;
case Plus:
print(node->Query.children[0]);
return;
case Or:
print(node->Or.children[0]);
printf("Or");
print(node->Or.children[1]);
return;
case And:
print(node->And.children[0]);
printf("And");
print(node->And.children[1]);
return;
case Class:
printf("Class");
printf("\"%s\"", node->Class.array);
return;
case Dot:
printf("Dot");
return;
case Exc:
printf("!");
print(node->Exc.children[0]);
return;
case Et:
printf("&");
print(node->Et.children[0]);
return;
case Id:
printf("%s\n",get(node,Id,symbol)->name);
return;
}
abort();
}
void println(Node *node)
{
print(node);
printf("\n");
}
InputBuffer *inputBuffer=0;
#define YY_INPUT(buff,result,maxSize) \
{if (atEnd(inputBuffer)){ \
result=0; \
} \
else { \
*buff=currentChar(inputBuffer); \
advance(inputBuffer,1); \
result=1; \
}}
Symbol *intern(char *name){
int left=0,right=symbolTable.length-1;
while (left<=right) {
int middle=(left+right)/2;
int comp=strcmp(name,symbolTable.elements[middle]->name);
if(comp<0){
right=middle-1;
}
else if(comp>0){
left=middle+1;
}
else{
return symbolTable.elements[middle];
}
}
symbolTable.elements= realloc(symbolTable.elements,sizeof(symbolTable.elements[0]) * (symbolTable.length+1));
memmove(symbolTable.elements+left+1,symbolTable.elements+left,(symbolTable.length-left)*sizeof(symbolTable.elements[0]));
symbolTable.length++;
return symbolTable.elements[left]=createSymbol(name);
}
void setRule(char *name, Node *rule)
{
printf("Setting rule %s to ", name);
println(rule);
intern(name)->rule=rule;
}
#define YYSTYPE Node *
%}
start = - declaration+
declaration = i:id '=' - e:expression { setRule(get(i, Id, symbol)->name, e) }
expression = or
or = a:and "|" - o:or { $$ = mkOr(o, a) }
| a:and { $$ = a }
and = p:prefix a:and { $$ = mkAnd(p, a) }
| p:prefix { $$ = p }
prefix = "!" - p : postfix { $$ = mkExc(p) }
| "&" - p : postfix { $$ = mkEt(p) }
| p: postfix { $$ = p}
postfix = s:atom ( "?" - { s = mkQuery(s) }
| "*" - { s = mkStar(s) }
| "+" - { s = mkPlus(s) }
)? { $$ = s }
atom = string | class | dot | rule | '(' - expression ')' -
rule = i:id !'=' { $$ = mkId(intern(yytext)) }
id = < [a-zA-z_][a-zA-z_0-9]* > - { $$ = mkId(intern(yytext)) }
string = '"' < [^\"]* > '"' - { $$ = mkString(yytext) }
class = '['-<(!']'.)*> ']' - { $$=mkClass(classify(yytext)) }
dot = '.' - { $$=mkDot() }
- = space*
space = [ \t] | '\n' '\r'* | '\r' '\n'*
%%
int execute(Node *node, InputBuffer *in)
{
switch (node->type) {
case String: {
if (strncmp(currentText(in), get(node,String,string), get(node,String,len))) {
return 0;
}
advance(in, get(node,String,len));
return 1;
}
case And: {
int pos= getPosition(in);
if (!execute(get(node,And,children[0]), in)) //si il y a eu une erreur
{
return 0;
} //si ça s'est bien passé
if (!execute(get(node,And,children[1]), in)) {
setPosition(in, pos);
return 0;
}
return 1;
}
case Or: {
if (execute(get(node,Or,children[0]), in)) {
return 1;
}
return execute(get(node,Or,children[1]), in);
}
case Star: {
while (execute(get(node,Star,children[0]), in));
return 1;
}
case Plus: {
if (!execute(get(node,Plus,children[0]), in)) {
return 0;
}
while (execute(get(node,Plus,children[0]), in));
return 1;
}
case Class: {
if (testBit(get(node,Class,array),currentChar(in))) {
advance(in, 1);
return 1;
}
return 0;
}
case Query: {
execute(get(node,Query,children[0]), in);
return 1;
}
case Exc: {
int pos= getPosition(in);
if (!execute(get(node,Exc,children[0]), in)) {
return 1;
}
setPosition(in, pos);
return 0;
}
case Et: {
int pos= getPosition(in);
if (!execute(get(node,Et,children[0]), in)) {
return 0;
}
setPosition(in, pos);
return 1;
}
case Dot: {
if (atEnd(in)) {
return 0;
}
advance(in, 1);
return 1;
}
case Id: {
Symbol *symbol= get(node, Id, symbol);
if (0 == symbol->rule) { printf("undefined rule: %s\n", symbol->name); }
return execute(symbol->rule, in);
}
}
printf("this cannot happen\n");
abort();
}
struct NodeCount{
int String;
int Query;
int Star;
int Plus;
int Or;
int And;
int Class;
int Dot;
int Exc;
int Et;
int Id;
};
NodeCount nodeCount={
.String=0,
.Query=0,
.Star=0,
.Plus=0,
.Or=0,
.And=0,
.Class=0,
.Dot=0,
.Exc=0,
.Et=0,
.Id=0
};
void printVariableName(enum op type, NodeCount count, int increment){
char number[50] ;
char *dest=malloc(30);
size_t n;
switch(type){
case String :
printf("string%i",nodeCount.String);
if(increment==1)nodeCount.String++;
break;
case Query :
if(increment==1)nodeCount.Query++;
number[0]=nodeCount.Query+'0';
n = strlen(number);
strncpy(dest,"query",5);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Star:
printf("star%i",nodeCount.Star);
if(increment==1) nodeCount.Star++;
break;
case Plus :
if(increment==1) nodeCount.Plus++;
number[0]=nodeCount.Plus+'0';
n = strlen(number);
strncpy(dest,"plus",4);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Or :
if(increment==1) nodeCount.Or++;
number[0]=nodeCount.Or+'0';
n = strlen(number);
strncpy(dest,"or",2);
strncat(dest, number, n);
printf("%s ",dest);
break;
case And:
if(increment==1) nodeCount.And++;
number[0]=nodeCount.And+'0';
n = strlen(number);
strncpy(dest,"and",3);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Class:
if(increment==1)nodeCount.Class++;
number[0]=nodeCount.Class+'0';
n = strlen(number);
strncpy(dest,"class",5);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Dot:
if(increment==1) nodeCount.Dot++;
number[0]=nodeCount.Dot+'0';
n = strlen(number);
strncpy(dest,"dot",3);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Exc:
if(increment==1) nodeCount.Exc++;
number[0]=nodeCount.Exc+'0';
n = strlen(number);
strncpy(dest,"exc",3);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Et:
if(increment==1) nodeCount.Et++;
number[0]=nodeCount.Et+'0';
n = strlen(number);
strncpy(dest,"et",2);
strncat(dest, number, n);
printf("%s ",dest);
break;
case Id:
if(increment==1) nodeCount.Id++;
number[0]=nodeCount.Id+'0';
n = strlen(number);
strncpy(dest,"id",2);
strncat(dest, number, n);
printf("%s ",dest);
break;
}
}
int nodeNumber=0;
int printCode(FILE *file,Node *node)
{
switch (node->type) {
case String:{
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i = { .String = { String, ",thisNumber);
fprintf(file,"\"%s\"", node->String.string);
fprintf(file," , %i }}; \n",node->String.len);
return thisNumber;
}
case Query: {
int i = printCode(file,node->Query.children[0]);
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i",thisNumber);
fprintf(file,"= { .Query = { Query, &node%i }};\n",i);
return thisNumber;
}
case Star: {
int i = printCode(file,node->Star.children[0]);
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i",thisNumber);
fprintf(file,"= { .Star = { Star, &node%i }};\n",i);
return thisNumber;
}
case Plus:{
int i = printCode(file,node->Plus.children[0]);
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i = { .Plus = { Plus, &node%i }};\n",thisNumber,i);
return thisNumber;
}
case Or:{
int i = printCode(file,node->Or.children[0]);
int j = printCode(file,node->Or.children[1]);
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i= { .Or = { Or, &node%i",thisNumber,i);
fprintf(file," , &node%i }};\n",j);
return thisNumber;
}
case And:{
int i = printCode(file,node->And.children[0]);
int j = printCode(file,node->And.children[1]);
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i= { .And = { And, &node%i",thisNumber,i);
fprintf(file," , &node%i }};\n",j);
return thisNumber;
}
case Class:{
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i= { .Class = { Class, %s }};\n",thisNumber,node->Class.array);
return thisNumber;
}
case Dot:{
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i= { .Dot = { Dot}};\n",thisNumber);
return thisNumber;
}
case Exc:{
int i = printCode(file,node->Exc.children[0]);
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i = { .Exc = { Exc, &node%i }};\n",thisNumber,i);
return thisNumber;
}
case Et: {
int i = printCode(file,node->Et.children[0]);
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i = { .Et = { Et, &node%i }};\n",thisNumber,i);
return thisNumber;
}
case Id: {
int thisNumber=nodeNumber++;
fprintf(file,"Node node%i = { .Id = { Id, %s", thisNumber, get(node,Id, symbol)->name);
fprintf(file," }};\n");
return thisNumber;
}
}
abort();
return 0;
}
void printSymbolTable(FILE *file) {
for (int k= 0; k<symbolTable.length; k++) {
int i=printCode(file,symbolTable.elements[k]->rule);
fprintf(file,"Node *%s= &node%i;\n",symbolTable.elements[k]->name,i);
}
}
int main(int argc, char **argv)
{
char *opt_f=0;
char *opt_c=0;
char *opt__=0;
for(int i=1;i<argc;i++){
char *arg=argv[i];
if(!strcmp(arg,"-f") && i<argc-1){
i++;
opt_f=argv[i];
continue;
}
if(!strcmp(arg,"-c") && i<argc-1){
i++;
opt_c=argv[i];
continue;
}
opt__=arg;
}
if (opt_f && opt__) fatal("file and command line expressions cannot both be supplied");
if (!opt_f && !opt__) fatal("no expression specified");
if (opt__) inputBuffer = mkInputBuffer(argv[1]);
if (opt_f){
FILE *fp= fopen(argv[2], "r");
if (!fp) {
perror(argv[2]);
exit(1);
}
struct stat sb;
if (fstat(fileno(fp), &sb)) {
perror(argv[2]);
exit(1);
}
char *text= malloc(sb.st_size);
if (!text) {
fatal("out of memory");
exit(1);
}
if (fread(text, sb.st_size, 1, fp) < 1) {
perror(argv[2]);
exit(1);
}
fclose(fp);
inputBuffer = mkInputBuffer(text);
}
if (!yyparse()) {
printf("Error\n");
return 1;
}
if( opt_c){
//int i=printCode(intern("start")->rule);
//printf("Node *start_rule= &node%i;\n",i);
FILE *outputFile = fopen(opt_c,"w");
printSymbolTable(outputFile);
return 0;
}
char *line=0;
size_t line_max=0;
ssize_t line_len=0;
Node *startRule= intern("start")->rule;
if (!startRule) {
fatal("no start rule");
return 1;
}
while ((line_len=getline(&line,&line_max,stdin))>=0) {
if (line_len>0 && line[line_len-1]=='\n') {
line[line_len-1]=0;
}
initInputBuffer(inputBuffer,line);
if (!execute(startRule, inputBuffer) || !atEnd(inputBuffer)) {
printf("no match, current position : %i\n", getPosition(inputBuffer));
}
else {
printf("match, current position : %i\n", getPosition(inputBuffer));
} // 0 => no match, 1 => match
}
return 0;
/*
switch (argc) {
case 1:{
int char_index=0;
char *text_file= malloc(50);
int ch;
while ( (ch = getchar()) != EOF ) {
printf("%c",ch);
text_file[char_index]=ch;
char_index++;
}
printf("%s",text_file);
inputBuffer = mkInputBuffer(text_file);
break;
}
case 2: {
inputBuffer = mkInputBuffer(argv[1]);
break;
}
case 3: {
if (!strcmp("-f", argv[1])) {
FILE *fp= fopen(argv[2], "r");
if (!fp) {
perror(argv[2]);
exit(1);
}
struct stat sb;
if (fstat(fileno(fp), &sb)) {
perror(argv[2]);
exit(1);
}
char *text= malloc(sb.st_size);
if (!text) {
fatal("out of memory");
exit(1);
}
if (fread(text, sb.st_size, 1, fp) < 1) {
perror(argv[2]);
exit(1);
}
fclose(fp);
inputBuffer = mkInputBuffer(text);
break;
}
fatal("unrecognised option: %s", argv[1]);
}
default: {
fatal("usage: %s parsing-expression | -f filename", argv[0]);
exit(1);
}
}
if (!yyparse()) {
printf("Error\n");
return 1;
}
char *line=0;
size_t line_max=0;
ssize_t line_len=0;
Node *startRule= intern("start")->rule;
if (!startRule) {
fatal("no start rule");
return 1;
}
while ((line_len=getline(&line,&line_max,stdin))>=0) {
if (line_len>0 && line[line_len-1]=='\n') {
line[line_len-1]=0;
}
initInputBuffer(inputBuffer,line);
if (!execute(startRule, inputBuffer) || !atEnd(inputBuffer)) {
printf("no match, current position : %i\n", getPosition(inputBuffer));
}
else {
printf("match, current position : %i\n", getPosition(inputBuffer));
} // 0 => no match, 1 => match
}
return 0;
*/
(void)yySet;
(void)yyPop;
(void)yyPush;
(void)yyAccept;
(void)yymatchDot;
(void)yymatchString;
(void)yymatchChar;
}