Dynamic PEG for interpreted languages.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

625 lignes
13 KiB

%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <math.h>
#include "inputBuffer.c"
#include "peg-nodes.h"
#include "peg-engine.c"
#include "class.c"
;
void fatal(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "\nError: ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
exit(1);
}
struct SymbolTable {
Symbol **elements;
int length;
};
#define SymbolTable_initialiser {0,0}
SymbolTable symbolTable= SymbolTable_initialiser;
Symbol *createSymbol(char *name) {
Symbol *symbol= calloc(1, sizeof(Symbol));
symbol->name= strdup(name);
symbol->number=0;
return symbol;
}
#define new(type) mkNode(sizeof(struct type),type)
Node *mkNode(size_t size,enum op type)
{
Node *node= calloc(1, size);
node->type= type;
return node;
}
Node *mkString(char *s)
{
Node *node= new(String);
node->String.string= strdup(s);
node->String.len=strlen(s);
return node;
}
Node *mkId(Symbol *s){
Node *node= new(Id);
node->Id.symbol=s;
return node;
}
Node *mkQuery(Node *n)
{
Node *node= new(Query);
node->Query.children[0]= n;
return node;
}
Node *mkOr(Node *node1, Node *node2)
{
Node *node= new(Or);
node->Or.children[0]= node1;
node->Or.children[1]= node2;
return node;
}
Node *mkAnd(Node *node1, Node *node2)
{
Node *node= new(And);
node->And.children[0]= node1;
node->And.children[1]= node2;
return node;
}
Node *mkStar(Node *n)
{
Node *node= new(Star);
node->Star.children[0]= n;
return node;
}
Node *mkClass(char* str)
{
Node *node= new(Class);
node->Class.array= str;
return node;
}
Node *mkPlus(Node *n)
{
Node *node= new(Plus);
node->Plus.children[0]= n;
return node;
}
Node *mkDot()
{
Node *node= new(Dot);
return node;
}
Node *mkExc(Node *n)
{
Node *node= new(Exc);
node->Exc.children[0]= n;
return node;
}
Node *mkEt(Node *n)
{
Node *node= new(Et);
node->Et.children[0]= n;
return node;
}
void print(Node *node)
{
switch (node->type) {
case String:
printf("\"%s\"", node->String.string);
return;
case Query:
print(node->Query.children[0]);
printf("?");
return;
case Star:
print(node->Query.children[0]);
printf("*");
return;
case Plus:
print(node->Query.children[0]);
return;
case Or:
print(node->Or.children[0]);
printf("Or");
print(node->Or.children[1]);
return;
case And:
print(node->And.children[0]);
printf("And");
print(node->And.children[1]);
return;
case Class:
printf("Class");
for(int i=0;i<32;i++){
printf("\\%03o",(unsigned char)node->Class.array[i]);
}
return;
case Dot:
printf("Dot");
return;
case Exc:
printf("!");
print(node->Exc.children[0]);
return;
case Et:
printf("&");
print(node->Et.children[0]);
return;
case Id:
printf("%s\n",get(node,Id,symbol)->name);
return;
}
abort();
}
void println(Node *node)
{
print(node);
printf("\n");
}
InputBuffer *inputBuffer=0;
#define YY_INPUT(buff,result,maxSize) \
{if (atEnd(inputBuffer)){ \
result=0; \
} \
else { \
*buff=currentChar(inputBuffer); \
advance(inputBuffer,1); \
result=1; \
}}
Symbol *intern(char *string){
int left=0,right=symbolTable.length-1;
char *name = strdup(string),*p =name;
while(*p){
if(*p=='-'){
*p='_';
}
p++;
}
while (left<=right) {
int middle=(left+right)/2;
int comp=strcmp(name,symbolTable.elements[middle]->name);
if(comp<0){
right=middle-1;
}
else if(comp>0){
left=middle+1;
}
else{
free(name);
return symbolTable.elements[middle];
}
}
symbolTable.elements= realloc(symbolTable.elements,sizeof(symbolTable.elements[0]) * (symbolTable.length+1));
memmove(symbolTable.elements+left+1,symbolTable.elements+left,(symbolTable.length-left)*sizeof(symbolTable.elements[0]));
symbolTable.length++;
Symbol *s= symbolTable.elements[left]=createSymbol(name);
free(name);
return s;
}
void setRule(char *name, Node *rule)
{
printf("Setting rule %s to ", name);
println(rule);
intern(name)->rule=rule;
}
#define YYSTYPE Node *
%}
start = - declaration+
declaration = i:id '=' - e:expression { setRule(get(i, Id, symbol)->name, e) }
expression = or
or = a:and "|" - o:or { $$ = mkOr(a, o) }
| a:and { $$ = a }
and = p:prefix a:and { $$ = mkAnd(p, a) }
| p:prefix { $$ = p }
prefix = "!" - p : postfix { $$ = mkExc(p) }
| "&" - p : postfix { $$ = mkEt(p) }
| p: postfix { $$ = p}
postfix = s:atom ( "?" - { s = mkQuery(s) }
| "*" - { s = mkStar(s) }
| "+" - { s = mkPlus(s) }
)? { $$ = s }
atom = string | class | dot | rule | '(' - expression ')' -
rule = i:id !'=' { $$ = mkId(intern(yytext)) }
id = < [a-zA-z_\-][a-zA-z_0-9]* > - { $$ = mkId(intern(yytext)) }
string = '"' < [^\"]* > '"' - { $$ = mkString(unescapedString(yytext)) }
| "'" < [^\']* > "'" - { $$ = mkString(unescapedString(yytext)) }
class = '['-<(!']'.)*> ']' - { $$=mkClass(classify(yytext)) }
dot = '.' - { $$=mkDot() }
- = space*
space = [ \t] | '\n' '\r'* | '\r' '\n'*
%%
struct NodeCount{
int String;
int Query;
int Star;
int Plus;
int Or;
int And;
int Class;
int Dot;
int Exc;
int Et;
int Id;
};
NodeCount nodeCount={
.String=0,
.Query=0,
.Star=0,
.Plus=0,
.Or=0,
.And=0,
.Class=0,
.Dot=0,
.Exc=0,
.Et=0,
.Id=0
};
int nodeNumber=0;
int printCode(FILE *file,Node *node)
{
int thisNumber=++nodeNumber;
switch (node->type) {
case String:{
fprintf(file,"Node node%i = { .String = { String, ",thisNumber);
fprintf(file,"\"");
for(int i=0;i<node->String.len;i++){
int c = (unsigned char) node->String.string[i];
switch(c) {
case '"':
case '\\':
fprintf(file,"\\%c",c);
continue;
default :
if(c >=' ' && c <= '~') fprintf(file,"%c",c);
else fprintf(file,"\\%03o",c);
}
}
fprintf(file,"\"");
fprintf(file," , %i }}; \n",node->String.len);
return thisNumber;
}
case Query: {
int i = printCode(file,node->Query.children[0]);
fprintf(file,"Node node%i",thisNumber);
fprintf(file,"= { .Query = { Query, {&node%i} }};\n",i);
return thisNumber;
}
case Star: {
int i = printCode(file,node->Star.children[0]);
fprintf(file,"Node node%i",thisNumber);
fprintf(file,"= { .Star = { Star, {&node%i} }};\n",i);
return thisNumber;
}
case Plus:{
int i = printCode(file,node->Plus.children[0]);
fprintf(file,"Node node%i = { .Plus = { Plus, {&node%i} }};\n",thisNumber,i);
return thisNumber;
}
case Or:{
int i = printCode(file,node->Or.children[0]);
int j = printCode(file,node->Or.children[1]);
fprintf(file,"Node node%i= { .Or = { Or, {&node%i",thisNumber,i);
fprintf(file," , &node%i} }};\n",j);
return thisNumber;
}
case And:{
int i = printCode(file,node->And.children[0]);
int j = printCode(file,node->And.children[1]);
fprintf(file,"Node node%i= { .And = { And, {&node%i",thisNumber,i);
fprintf(file," , &node%i} }};\n",j);
return thisNumber;
}
case Class:{
fprintf(file,"Node node%i= { .Class = { Class, \"",thisNumber);
for(int i=0;i<32;i++){
fprintf(file,"\\%03o",(unsigned char)node->Class.array[i]);
}
fprintf(file,"\" }};\n");
return thisNumber;
}
case Dot:{
fprintf(file,"Node node%i= { .Dot = { Dot}};\n",thisNumber);
return thisNumber;
}
case Exc:{
int i = printCode(file,node->Exc.children[0]);
fprintf(file,"Node node%i = { .Exc = { Exc, {&node%i} }};\n",thisNumber,i);
return thisNumber;
}
case Et: {
int i = printCode(file,node->Et.children[0]);
fprintf(file,"Node node%i = { .Et = { Et, {&node%i} }};\n",thisNumber,i);
return thisNumber;
}
case Id: {
fprintf(file,"Node node%i = { .Id = { Id, &symbol_%s", thisNumber, get(node,Id, symbol)->name);
fprintf(file," }};\n");
return thisNumber;
}
}
abort();
return 0;
}
void printSymbolTable(FILE *file) {
for (int k= 0; k<symbolTable.length; k++) {
int i=printCode(file,symbolTable.elements[k]->rule);
fprintf(file,"Node *%s= &node%i;\n",symbolTable.elements[k]->name,i);
symbolTable.elements[k]->number=i;
}
}
void declareSymbols(FILE *file) {
for (int k= 0; k<symbolTable.length; k++) {
fprintf(file,"Symbol symbol_%s ;\n",symbolTable.elements[k]->name);
}
}
void defineSymbols(FILE *file) {
for (int k= 0; k<symbolTable.length; k++) {
Symbol *s=symbolTable.elements[k];
fprintf(file,"Symbol symbol_%s = { \"%s\", &node%i, %i};\n",s->name,s->name,s->number,s->number);
}
}
int main(int argc, char **argv)
{
char *opt_f=0;
char *opt_c=0;
char *opt__=0;
for(int i=1;i<argc;i++){
char *arg=argv[i];
if(!strcmp(arg,"-f") && i<argc-1){
i++;
opt_f=argv[i];
continue;
}
if(!strcmp(arg,"-c") && i<argc-1){
i++;
opt_c=argv[i];
continue;
}
opt__=arg;
}
if (opt_f && opt__) fatal("file and command line expressions cannot both be supplied");
if (!opt_f && !opt__) fatal("no expression specified");
if (opt__) inputBuffer = mkInputBuffer(argv[1]);
if (opt_f){
FILE *fp= fopen(argv[2], "r");
if (!fp) {
perror(argv[2]);
exit(1);
}
struct stat sb;
if (fstat(fileno(fp), &sb)) {
perror(argv[2]);
exit(1);
}
char *text= malloc(sb.st_size);
if (!text) {
fatal("out of memory");
exit(1);
}
if (fread(text, sb.st_size, 1, fp) < 1) {
perror(argv[2]);
exit(1);
}
fclose(fp);
inputBuffer = mkInputBuffer(text);
}
if (!yyparse()) {
printf("Error\n");
return 1;
}
if( opt_c){
//int i=printCode(intern("start")->rule);
//printf("Node *start_rule= &node%i;\n",i);
FILE *outputFile = fopen(opt_c,"w");
declareSymbols(outputFile);
printSymbolTable(outputFile);
defineSymbols(outputFile);
return 0;
}
char *line=0;
size_t line_max=0;
ssize_t line_len=0;
Node *startRule= intern("start")->rule;
if (!startRule) {
fatal("no start rule");
return 1;
}
while ((line_len=getline(&line,&line_max,stdin))>=0) {
if (line_len>0 && line[line_len-1]=='\n') {
line[line_len-1]=0;
}
initInputBuffer(inputBuffer,line);
if (!execute(startRule, inputBuffer) || !atEnd(inputBuffer)) {
printf("no match, current position : %i\n", getPosition(inputBuffer));
}
else {
printf("match, current position : %i\n", getPosition(inputBuffer));
} // 0 => no match, 1 => match
}
return 0;
/*
switch (argc) {
case 1:{
int char_index=0;
char *text_file= malloc(50);
int ch;
while ( (ch = getchar()) != EOF ) {
printf("%c",ch);
text_file[char_index]=ch;
char_index++;
}
printf("%s",text_file);
inputBuffer = mkInputBuffer(text_file);
break;
}
case 2: {
inputBuffer = mkInputBuffer(argv[1]);
break;
}
case 3: {
if (!strcmp("-f", argv[1])) {
FILE *fp= fopen(argv[2], "r");
if (!fp) {
perror(argv[2]);
exit(1);
}
struct stat sb;
if (fstat(fileno(fp), &sb)) {
perror(argv[2]);
exit(1);
}
char *text= malloc(sb.st_size);
if (!text) {
fatal("out of memory");
exit(1);
}
if (fread(text, sb.st_size, 1, fp) < 1) {
perror(argv[2]);
exit(1);
}
fclose(fp);
inputBuffer = mkInputBuffer(text);
break;
}
fatal("unrecognised option: %s", argv[1]);
}
default: {
fatal("usage: %s parsing-expression | -f filename", argv[0]);
exit(1);
}
}
if (!yyparse()) {
printf("Error\n");
return 1;
}
char *line=0;
size_t line_max=0;
ssize_t line_len=0;
Node *startRule= intern("start")->rule;
if (!startRule) {
fatal("no start rule");
return 1;
}
while ((line_len=getline(&line,&line_max,stdin))>=0) {
if (line_len>0 && line[line_len-1]=='\n') {
line[line_len-1]=0;
}
initInputBuffer(inputBuffer,line);
if (!execute(startRule, inputBuffer) || !atEnd(inputBuffer)) {
printf("no match, current position : %i\n", getPosition(inputBuffer));
}
else {
printf("match, current position : %i\n", getPosition(inputBuffer));
} // 0 => no match, 1 => match
}
return 0;
*/
(void)yySet;
(void)yyPop;
(void)yyPush;
(void)yyAccept;
(void)yymatchDot;
(void)yymatchString;
(void)yymatchChar;
}