Dynamic PEG for interpreted languages.
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

421 строки
8.7 KiB

%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "inputBuffer.c"
;
enum op { String, Query, Star, Plus, Or, And, Class, Dot, Exc, Id } ;
typedef union Node Node;
typedef struct Symbol Symbol;
typedef struct Array Array;
typedef struct SymbolTable SymbolTable;
struct Array {
Node **elements;
int length;
};
struct String { enum op type; char *string; int len; };
struct Query { enum op type; Node *children[1]; };
struct Star { enum op type; Node *children[1]; };
struct Plus { enum op type; Node *children[1]; };
struct Or { enum op type; Node *children[2]; };
struct And { enum op type; Node *children[2]; };
struct Class { enum op type; char *stringValue; int len; };
struct Dot { enum op type; };
struct Exc { enum op type; Node *children[1]; };
struct Id { enum op type; Symbol *symbol; };
union Node {
enum op type;
struct String String;
struct Query Query;
struct Star Star;
struct Plus Plus;
struct Or Or;
struct And And;
struct Class Class;
struct Dot Dot;
struct Exc Exc;
struct Id Id;
};
struct Symbol{
char* name;
Node *rule;
};
struct SymbolTable {
Symbol **elements;
int length;
};
#define SymbolTable_initialiser {0,0}
SymbolTable symbolTable= SymbolTable_initialiser;
Symbol *createSymbol(char *name) {
Symbol *symbol= calloc(1, sizeof(Symbol));
symbol->name= strdup(name);
return symbol;
}
#define new(type) mkNode(sizeof(struct type),type)
Node *mkNode(size_t size,enum op type)
{
Node *node= calloc(1, size);
node->type= type;
return node;
}
Node *mkString(char *s)
{
Node *node= new(String);
node->String.string= strdup(s);
node->String.len=strlen(s);
return node;
}
Node *mkId(Symbol *s){
Node *node= new(Id);
node->Id.symbol=s;
return node;
}
Node *mkQuery(Node *n)
{
Node *node= new(Query);
node->Query.children[0]= n;
return node;
}
Node *mkOr(Node *node1, Node *node2)
{
Node *node= new(Or);
node->Or.children[0]= node1;
node->Or.children[1]= node2;
return node;
}
Node *mkAnd(Node *node1, Node *node2)
{
Node *node= new(And);
node->And.children[0]= node1;
node->And.children[1]= node2;
return node;
}
Node *mkStar(Node *n)
{
Node *node= new(Star);
node->Star.children[0]= n;
return node;
}
Node *mkClass(char* str)
{
Node *node= new(Class);
node->Class.stringValue= str;
node->Class.len=strlen(str);
return node;
}
Node *mkPlus(Node *n)
{
Node *node= new(Plus);
node->Plus.children[0]= n;
return node;
}
Node *mkDot()
{
Node *node= new(Dot);
return node;
}
Node *mkExc(Node *n)
{
Node *node= new(Exc);
node->Exc.children[0]= n;
return node;
}
Node *_checktype(Node *object, enum op type)
{
if (object->type == type) return object;
fprintf(stderr, "\naccesing type %i as if it were a %i\n", object->type, type);
exit(1);
return 0;
}
#define get(object, type, member) (_checktype(object, type)->type.member)
void print(Node *node)
{
switch (node->type) {
case String:
printf("\"%s\"", node->String.string);
return;
case Query:
print(node->Query.children[0]);
printf("?");
return;
case Star:
print(node->Query.children[0]);
printf("*");
return;
case Plus:
print(node->Query.children[0]);
return;
case Or:
print(node->Or.children[0]);
printf("Or");
print(node->Or.children[1]);
return;
case And:
print(node->And.children[0]);
printf("And");
print(node->And.children[1]);
return;
case Class:
printf("Class");
printf("\"%s\"", node->Class.stringValue);
return;
case Dot:
printf("Dot");
return;
case Exc:
printf("!");
print(node->Exc.children[0]);
return;
case Id:
printf("%s\n",get(node,Id,symbol)->name);
return;
}
abort();
}
void println(Node *node)
{
print(node);
printf("\n");
}
InputBuffer *inputBuffer=0;
#define YY_INPUT(buff,result,maxSize) \
{if (atEnd(inputBuffer)){ \
result=0; \
} \
else { \
*buff=currentChar(inputBuffer); \
advance(inputBuffer,1); \
result=1; \
}}
Symbol *intern(char *name){
int left=0,right=symbolTable.length-1;
while(left<=right){
int middle=(left+right)/2;
int comp=strcmp(name,symbolTable.elements[middle]->name);
if(comp<0){
right=middle-1;
}
else if(comp>0){
left=middle+1;
}
else{
return symbolTable.elements[middle];
}
}
symbolTable.elements= realloc(symbolTable.elements,sizeof(symbolTable.elements[0]) * (symbolTable.length+1));
memmove(symbolTable.elements+left+1,symbolTable.elements+left,(symbolTable.length-left)*sizeof(symbolTable.elements[0]));
symbolTable.length++;
return symbolTable.elements[left]=createSymbol(name);
}
void setRule(char *name, Node *rule){
intern(name)->rule=rule;
}
#define YYSTYPE Node *
YYSTYPE yylval = 0;
%}
start = declaration+
declaration = i:id '=' - e:expression { setRule(get(i, Id, symbol)->name, e) }
expression = or
or = a:and - "|" - o:or {$$ = mkOr(o, a) }
| a:and { $$ = a }
and = p:prefix a:and { $$ = mkAnd(p, a); }
| p:prefix { $$ = p }
prefix = "!"- p : postfix {$$ = mkExc(p)}
| p: postfix {$$ = p}
postfix = s:atom ( "?" - { s = mkQuery(s) }
| "*" - { s = mkStar(s) }
| "+" - { s = mkPlus(s) }
)? { $$ = s}
atom = string | class | dot | rule
rule = i:id !'=' { $$ = mkId(intern(yytext)) }
id = < [a-zA-z_][a-zA-z_0-9]* > - { $$ = mkId(intern(yytext)) }
string = '"' < [^"]* > '"' { $$ = mkString(yytext) } -
class = '['-<(!']'string)*> ']' { $$=mkClass(yytext) } -
dot = -'.'- {$$=mkDot()} -
- = space*
space = [ \t] | '\n' '\r'* | '\r' '\n'*
%%
int execute(Node *node, InputBuffer *in)
{
switch (node->type) {
case String: {
if (strncmp(currentText(in), get(node,String,string), get(node,String,len))) {
return 0;
}
advance(in, get(node,String,len));
return 1;
}
case And: {
int pos= getPosition(in);
if (!execute(get(node,And,children[0]), in)) //si il y a eu une erreur
{
return 0;
} //si ça s'est bien passé
if (!execute(get(node,And,children[1]), in)) {
setPosition(in, pos);
return 0;
}
return 1;
}
case Or: {
if (execute(get(node,Or,children[0]), in)) {
return 1;
}
return execute(get(node,Or,children[1]), in);
}
case Star: {
while (execute(get(node,Star,children[0]), in));
return 1;
}
case Plus: {
if (!execute(get(node,Plus,children[0]), in)) {
return 0;
}
while (execute(get(node,Plus,children[0]), in));
return 1;
}
case Class: {
if (!currentChar(in)) {
return 0;
}
if (strchr(get(node,Class,stringValue), currentChar(in))) {
advance(in, 1);
return 1;
}
return 0;
}
case Query: {
execute(get(node,Query,children[0]), in);
return 1;
}
case Exc: {
int pos= getPosition(in);
if (!execute(get(node,Exc,children[0]), in)) {
return 1;
}
setPosition(in, pos);
return 0;
}
case Dot: {
if (atEnd(in)) {
return 0;
}
advance(in, 1);
return 1;
}
case Id: {
Symbol *symbol= get(node, Id, symbol);
if (0 == symbol->rule) { printf("undefined rule: %s\n", symbol->name); }
return execute(symbol->rule, in);
}
}
printf("this cannot happen\n");
abort();
}
int main(int argc, char **argv)
{
if(argc<2) return 0;
inputBuffer = mkInputBuffer(argv[1]);
if (!yyparse()) {
printf("Error\n");
return 1;
}
println(yylval);
char *line=0;
size_t line_max=0;
ssize_t line_len=0;
Symbol *start= intern("start");
if (!start->rule) perror("no start rule");
while((line_len=getline(&line,&line_max,stdin))>=0){
if(line_len>0 && line[line_len-1]=='\n'){
line[line_len-1]=0;
}
initInputBuffer(inputBuffer,line);
if (!execute(yylval, inputBuffer) || !atEnd(inputBuffer)){
printf("no match, current position : %i\n", getPosition(inputBuffer));
}
else{
printf("match, current position : %i\n", getPosition(inputBuffer));
}// 0 => no match, 1 => match
}
return 0;
(void)yySet;
(void)yyPop;
(void)yyPush;
(void)yyAccept;
(void)yymatchDot;
(void)yymatchString;
(void)yymatchChar;
}