diff --git a/parse.leg b/parse.leg index 2334526..d836cc4 100644 --- a/parse.leg +++ b/parse.leg @@ -3,8 +3,23 @@ #include #include #include +#include +#include + #include "inputBuffer.c" ; + +void fatal(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + fprintf(stderr, "\nError: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(1); +} + enum op { String, Query, Star, Plus, Or, And, Class, Dot, Exc, Id } ; @@ -82,7 +97,7 @@ Node *mkString(char *s) } Node *mkId(Symbol *s){ - + Node *node= new(Id); node->Id.symbol=s; return node; @@ -107,7 +122,7 @@ Node *mkOr(Node *node1, Node *node2) Node *mkAnd(Node *node1, Node *node2) { - Node *node= new(And); + Node *node= new(And); node->And.children[0]= node1; node->And.children[1]= node2; return node; @@ -162,43 +177,43 @@ void print(Node *node) { switch (node->type) { case String: - printf("\"%s\"", node->String.string); - return; + printf("\"%s\"", node->String.string); + return; case Query: - print(node->Query.children[0]); - printf("?"); - return; + print(node->Query.children[0]); + printf("?"); + return; case Star: - print(node->Query.children[0]); - printf("*"); - return; + print(node->Query.children[0]); + printf("*"); + return; case Plus: - print(node->Query.children[0]); - return; + print(node->Query.children[0]); + return; case Or: - print(node->Or.children[0]); - printf("Or"); - print(node->Or.children[1]); - return; + print(node->Or.children[0]); + printf("Or"); + print(node->Or.children[1]); + return; case And: - print(node->And.children[0]); - printf("And"); - print(node->And.children[1]); - return; + print(node->And.children[0]); + printf("And"); + print(node->And.children[1]); + return; case Class: - printf("Class"); - printf("\"%s\"", node->Class.stringValue); - return; + printf("Class"); + printf("\"%s\"", node->Class.stringValue); + return; case Dot: - printf("Dot"); - return; + printf("Dot"); + return; case Exc: - printf("!"); - print(node->Exc.children[0]); - return; + printf("!"); + print(node->Exc.children[0]); + return; case Id: - printf("%s\n",get(node,Id,symbol)->name); - return; + printf("%s\n",get(node,Id,symbol)->name); + return; } abort(); } @@ -218,7 +233,7 @@ else { \ *buff=currentChar(inputBuffer); \ advance(inputBuffer,1); \ result=1; \ -}} +}} @@ -226,11 +241,11 @@ else { \ Symbol *intern(char *name){ int left=0,right=symbolTable.length-1; - - while(left<=right){ + + while (left<=right) { int middle=(left+right)/2; int comp=strcmp(name,symbolTable.elements[middle]->name); - + if(comp<0){ right=middle-1; } @@ -241,7 +256,7 @@ Symbol *intern(char *name){ return symbolTable.elements[middle]; } } - + symbolTable.elements= realloc(symbolTable.elements,sizeof(symbolTable.elements[0]) * (symbolTable.length+1)); memmove(symbolTable.elements+left+1,symbolTable.elements+left,(symbolTable.length-left)*sizeof(symbolTable.elements[0])); symbolTable.length++; @@ -249,130 +264,127 @@ Symbol *intern(char *name){ } -void setRule(char *name, Node *rule){ +void setRule(char *name, Node *rule) +{ + printf("Setting rule %s to ", name); + println(rule); intern(name)->rule=rule; } #define YYSTYPE Node * -YYSTYPE yylval = 0; %} -start = declaration+ - -declaration = i:id '=' - e:expression { setRule(get(i, Id, symbol)->name, e) } +start = - declaration+ -expression = or +declaration = i:id '=' - e:expression { setRule(get(i, Id, symbol)->name, e) } -or = a:and - "|" - o:or {$$ = mkOr(o, a) } - | a:and { $$ = a } +expression = or -and = p:prefix a:and { $$ = mkAnd(p, a); } - | p:prefix { $$ = p } +or = a:and "|" - o:or { $$ = mkOr(o, a) } + | a:and { $$ = a } -prefix = "!"- p : postfix {$$ = mkExc(p)} - | p: postfix {$$ = p} +and = p:prefix a:and { $$ = mkAnd(p, a) } + | p:prefix { $$ = p } -postfix = s:atom ( "?" - { s = mkQuery(s) } - | "*" - { s = mkStar(s) } - | "+" - { s = mkPlus(s) } - )? { $$ = s} +prefix = "!" - p : postfix { $$ = mkExc(p) } + | p: postfix { $$ = p} -atom = string | class | dot | rule +postfix = s:atom ( "?" - { s = mkQuery(s) } + | "*" - { s = mkStar(s) } + | "+" - { s = mkPlus(s) } + )? { $$ = s } -rule = i:id !'=' { $$ = mkId(intern(yytext)) } +atom = string | class | dot | rule -id = < [a-zA-z_][a-zA-z_0-9]* > - { $$ = mkId(intern(yytext)) } +rule = i:id !'=' { $$ = mkId(intern(yytext)) } -string = '"' < [^"]* > '"' { $$ = mkString(yytext) } - +id = < [a-zA-z_][a-zA-z_0-9]* > - { $$ = mkId(intern(yytext)) } -class = '['-<(!']'string)*> ']' { $$=mkClass(yytext) } - +string = '"' < [^\"]* > '"' - { $$ = mkString(yytext) } -dot = -'.'- {$$=mkDot()} - +class = '['-<(!']'string)*> ']' - { $$=mkClass(yytext) } +dot = '.' - { $$=mkDot() } -- = space* +- = space* -space = [ \t] | '\n' '\r'* | '\r' '\n'* +space = [ \t] | '\n' '\r'* | '\r' '\n'* %% - - - - int execute(Node *node, InputBuffer *in) { switch (node->type) { case String: { - if (strncmp(currentText(in), get(node,String,string), get(node,String,len))) { - return 0; - } - advance(in, get(node,String,len)); - return 1; + if (strncmp(currentText(in), get(node,String,string), get(node,String,len))) { + return 0; + } + advance(in, get(node,String,len)); + return 1; } case And: { - int pos= getPosition(in); - if (!execute(get(node,And,children[0]), in)) //si il y a eu une erreur - { - return 0; - } //si ça s'est bien passé - if (!execute(get(node,And,children[1]), in)) { - setPosition(in, pos); - return 0; - } - return 1; + int pos= getPosition(in); + if (!execute(get(node,And,children[0]), in)) //si il y a eu une erreur + { + return 0; + } //si ça s'est bien passé + if (!execute(get(node,And,children[1]), in)) { + setPosition(in, pos); + return 0; + } + return 1; } case Or: { - if (execute(get(node,Or,children[0]), in)) { - return 1; - } - return execute(get(node,Or,children[1]), in); + if (execute(get(node,Or,children[0]), in)) { + return 1; + } + return execute(get(node,Or,children[1]), in); } case Star: { - while (execute(get(node,Star,children[0]), in)); - return 1; + while (execute(get(node,Star,children[0]), in)); + return 1; } case Plus: { - if (!execute(get(node,Plus,children[0]), in)) { - return 0; - } - while (execute(get(node,Plus,children[0]), in)); - return 1; + if (!execute(get(node,Plus,children[0]), in)) { + return 0; + } + while (execute(get(node,Plus,children[0]), in)); + return 1; } case Class: { - if (!currentChar(in)) { - return 0; - } - if (strchr(get(node,Class,stringValue), currentChar(in))) { - advance(in, 1); - return 1; - } - return 0; + if (!currentChar(in)) { + return 0; + } + if (strchr(get(node,Class,stringValue), currentChar(in))) { + advance(in, 1); + return 1; + } + return 0; } case Query: { - execute(get(node,Query,children[0]), in); - return 1; + execute(get(node,Query,children[0]), in); + return 1; } case Exc: { - int pos= getPosition(in); - if (!execute(get(node,Exc,children[0]), in)) { - return 1; - } - setPosition(in, pos); - return 0; + int pos= getPosition(in); + if (!execute(get(node,Exc,children[0]), in)) { + return 1; + } + setPosition(in, pos); + return 0; } case Dot: { - if (atEnd(in)) { - return 0; - } - advance(in, 1); - return 1; + if (atEnd(in)) { + return 0; + } + advance(in, 1); + return 1; } case Id: { - Symbol *symbol= get(node, Id, symbol); - if (0 == symbol->rule) { printf("undefined rule: %s\n", symbol->name); } - return execute(symbol->rule, in); + Symbol *symbol= get(node, Id, symbol); + if (0 == symbol->rule) { printf("undefined rule: %s\n", symbol->name); } + return execute(symbol->rule, in); } } printf("this cannot happen\n"); @@ -381,33 +393,69 @@ int execute(Node *node, InputBuffer *in) int main(int argc, char **argv) { - if(argc<2) return 0; - - inputBuffer = mkInputBuffer(argv[1]); + switch (argc) { + case 2: { + inputBuffer = mkInputBuffer(argv[1]); + break; + } + case 3: { + if (!strcmp("-f", argv[1])) { + FILE *fp= fopen(argv[2], "r"); + if (!fp) { + perror(argv[2]); + exit(1); + } + struct stat sb; + if (fstat(fileno(fp), &sb)) { + perror(argv[2]); + exit(1); + } + char *text= malloc(sb.st_size); + if (!text) { + fatal("out of memory"); + exit(1); + } + if (fread(text, sb.st_size, 1, fp) < 1) { + perror(argv[2]); + exit(1); + } + fclose(fp); + inputBuffer = mkInputBuffer(text); + break; + } + fatal("unrecognised option: %s", argv[1]); + } + default: { + fatal("usage: %s parsing-expression | -f filename", argv[0]); + exit(1); + } + } if (!yyparse()) { printf("Error\n"); return 1; } - println(yylval); - char *line=0; - size_t line_max=0; + + char *line=0; + size_t line_max=0; ssize_t line_len=0; - Symbol *start= intern("start"); - if (!start->rule) perror("no start rule"); - while((line_len=getline(&line,&line_max,stdin))>=0){ - if(line_len>0 && line[line_len-1]=='\n'){ + Node *startRule= intern("start")->rule; + if (!startRule) { + fatal("no start rule"); + return 1; + } + while ((line_len=getline(&line,&line_max,stdin))>=0) { + if (line_len>0 && line[line_len-1]=='\n') { line[line_len-1]=0; } initInputBuffer(inputBuffer,line); - if (!execute(yylval, inputBuffer) || !atEnd(inputBuffer)){ + if (!execute(startRule, inputBuffer) || !atEnd(inputBuffer)) { printf("no match, current position : %i\n", getPosition(inputBuffer)); - } - else{ + } + else { printf("match, current position : %i\n", getPosition(inputBuffer)); - }// 0 => no match, 1 => match + } // 0 => no match, 1 => match } - return 0;