Dynamic PEG for interpreted languages.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

625 rindas
13 KiB

%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <math.h>
#include "inputBuffer.c"
#include "peg-nodes.h"
#include "peg-engine.c"
#include "class.c"
;
void fatal(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "\nError: ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
exit(1);
}
struct SymbolTable {
Symbol **elements;
int length;
};
#define SymbolTable_initialiser {0,0}
SymbolTable symbolTable= SymbolTable_initialiser;
Symbol *createSymbol(char *name) {
Symbol *symbol= calloc(1, sizeof(Symbol));
symbol->name= strdup(name);
symbol->number=0;
return symbol;
}
#define new(type) mkNode(sizeof(struct type),type)
Node *mkNode(size_t size,enum op type)
{
Node *node= calloc(1, size);
node->type= type;
return node;
}
Node *mkString(char *s)
{
Node *node= new(String);
node->String.string= strdup(s);
node->String.len=strlen(s);
return node;
}
Node *mkId(Symbol *s){
Node *node= new(Id);
node->Id.symbol=s;
return node;
}
Node *mkQuery(Node *n)
{
Node *node= new(Query);
node->Query.children[0]= n;
return node;
}
Node *mkOr(Node *node1, Node *node2)
{
Node *node= new(Or);
node->Or.children[0]= node1;
node->Or.children[1]= node2;
return node;
}
Node *mkAnd(Node *node1, Node *node2)
{
Node *node= new(And);
node->And.children[0]= node1;
node->And.children[1]= node2;
return node;
}
Node *mkStar(Node *n)
{
Node *node= new(Star);
node->Star.children[0]= n;
return node;
}
Node *mkClass(char* str)
{
Node *node= new(Class);
node->Class.array= str;
return node;
}
Node *mkPlus(Node *n)
{
Node *node= new(Plus);
node->Plus.children[0]= n;
return node;
}
Node *mkDot()
{
Node *node= new(Dot);
return node;
}
Node *mkExc(Node *n)
{
Node *node= new(Exc);
node->Exc.children[0]= n;
return node;
}
Node *mkEt(Node *n)
{
Node *node= new(Et);
node->Et.children[0]= n;
return node;
}
void print(Node *node)
{
switch (node->type) {
case String:
printf("\"%s\"", node->String.string);
return;
case Query:
print(node->Query.children[0]);
printf("?");
return;
case Star:
print(node->Query.children[0]);
printf("*");
return;
case Plus:
print(node->Query.children[0]);
return;
case Or:
print(node->Or.children[0]);
printf("Or");
print(node->Or.children[1]);
return;
case And:
print(node->And.children[0]);
printf("And");
print(node->And.children[1]);
return;
case Class:
printf("Class");
for(int i=0;i<32;i++){
printf("\\%03o",(unsigned char)node->Class.array[i]);
}
return;
case Dot:
printf("Dot");
return;
case Exc:
printf("!");
print(node->Exc.children[0]);
return;
case Et:
printf("&");
print(node->Et.children[0]);
return;
case Id:
printf("%s\n",get(node,Id,symbol)->name);
return;
}
abort();
}
void println(Node *node)
{
print(node);
printf("\n");
}
InputBuffer *inputBuffer=0;
#define YY_INPUT(buff,result,maxSize) \
{if (atEnd(inputBuffer)){ \
result=0; \
} \
else { \
*buff=currentChar(inputBuffer); \
advance(inputBuffer,1); \
result=1; \
}}
Symbol *intern(char *string){
int left=0,right=symbolTable.length-1;
char *name = strdup(string),*p =name;
while(*p){
if(*p=='-'){
*p='_';
}
p++;
}
while (left<=right) {
int middle=(left+right)/2;
int comp=strcmp(name,symbolTable.elements[middle]->name);
if(comp<0){
right=middle-1;
}
else if(comp>0){
left=middle+1;
}
else{
free(name);
return symbolTable.elements[middle];
}
}
symbolTable.elements= realloc(symbolTable.elements,sizeof(symbolTable.elements[0]) * (symbolTable.length+1));
memmove(symbolTable.elements+left+1,symbolTable.elements+left,(symbolTable.length-left)*sizeof(symbolTable.elements[0]));
symbolTable.length++;
Symbol *s= symbolTable.elements[left]=createSymbol(name);
free(name);
return s;
}
void setRule(char *name, Node *rule)
{
printf("Setting rule %s to ", name);
println(rule);
intern(name)->rule=rule;
}
#define YYSTYPE Node *
%}
start = - declaration+
declaration = i:id '=' - e:expression { setRule(get(i, Id, symbol)->name, e) }
expression = or
or = a:and "|" - o:or { $$ = mkOr(a, o) }
| a:and { $$ = a }
and = p:prefix a:and { $$ = mkAnd(p, a) }
| p:prefix { $$ = p }
prefix = "!" - p : postfix { $$ = mkExc(p) }
| "&" - p : postfix { $$ = mkEt(p) }
| p: postfix { $$ = p}
postfix = s:atom ( "?" - { s = mkQuery(s) }
| "*" - { s = mkStar(s) }
| "+" - { s = mkPlus(s) }
)? { $$ = s }
atom = string | class | dot | rule | '(' - expression ')' -
rule = i:id !'=' { $$ = mkId(intern(yytext)) }
id = < [a-zA-z_\-][a-zA-z_0-9]* > - { $$ = mkId(intern(yytext)) }
string = '"' < [^\"]* > '"' - { $$ = mkString(unescapedString(yytext)) }
| "'" < [^\']* > "'" - { $$ = mkString(unescapedString(yytext)) }
class = '['-<(!']'.)*> ']' - { $$=mkClass(classify(yytext)) }
dot = '.' - { $$=mkDot() }
- = space*
space = [ \t] | '\n' '\r'* | '\r' '\n'*
%%
struct NodeCount{
int String;
int Query;
int Star;
int Plus;
int Or;
int And;
int Class;
int Dot;
int Exc;
int Et;
int Id;
};
NodeCount nodeCount={
.String=0,
.Query=0,
.Star=0,
.Plus=0,
.Or=0,
.And=0,
.Class=0,
.Dot=0,
.Exc=0,
.Et=0,
.Id=0
};
int nodeNumber=0;
int printCode(FILE *file,Node *node)
{
int thisNumber=++nodeNumber;
switch (node->type) {
case String:{
fprintf(file,"Node node%i = { .String = { String, ",thisNumber);
fprintf(file,"\"");
for(int i=0;i<node->String.len;i++){
int c = (unsigned char) node->String.string[i];
switch(c) {
case '"':
case '\\':
fprintf(file,"\\%c",c);
continue;
default :
if(c >=' ' && c <= '~') fprintf(file,"%c",c);
else fprintf(file,"\\%03o",c);
}
}
fprintf(file,"\"");
fprintf(file," , %i }}; \n",node->String.len);
return thisNumber;
}
case Query: {
int i = printCode(file,node->Query.children[0]);
fprintf(file,"Node node%i",thisNumber);
fprintf(file,"= { .Query = { Query, {&node%i} }};\n",i);
return thisNumber;
}
case Star: {
int i = printCode(file,node->Star.children[0]);
fprintf(file,"Node node%i",thisNumber);
fprintf(file,"= { .Star = { Star, {&node%i} }};\n",i);
return thisNumber;
}
case Plus:{
int i = printCode(file,node->Plus.children[0]);
fprintf(file,"Node node%i = { .Plus = { Plus, {&node%i} }};\n",thisNumber,i);
return thisNumber;
}
case Or:{
int i = printCode(file,node->Or.children[0]);
int j = printCode(file,node->Or.children[1]);
fprintf(file,"Node node%i= { .Or = { Or, {&node%i",thisNumber,i);
fprintf(file," , &node%i} }};\n",j);
return thisNumber;
}
case And:{
int i = printCode(file,node->And.children[0]);
int j = printCode(file,node->And.children[1]);
fprintf(file,"Node node%i= { .And = { And, {&node%i",thisNumber,i);
fprintf(file," , &node%i} }};\n",j);
return thisNumber;
}
case Class:{
fprintf(file,"Node node%i= { .Class = { Class, \"",thisNumber);
for(int i=0;i<32;i++){
fprintf(file,"\\%03o",(unsigned char)node->Class.array[i]);
}
fprintf(file,"\" }};\n");
return thisNumber;
}
case Dot:{
fprintf(file,"Node node%i= { .Dot = { Dot}};\n",thisNumber);
return thisNumber;
}
case Exc:{
int i = printCode(file,node->Exc.children[0]);
fprintf(file,"Node node%i = { .Exc = { Exc, {&node%i} }};\n",thisNumber,i);
return thisNumber;
}
case Et: {
int i = printCode(file,node->Et.children[0]);
fprintf(file,"Node node%i = { .Et = { Et, {&node%i} }};\n",thisNumber,i);
return thisNumber;
}
case Id: {
fprintf(file,"Node node%i = { .Id = { Id, &symbol_%s", thisNumber, get(node,Id, symbol)->name);
fprintf(file," }};\n");
return thisNumber;
}
}
abort();
return 0;
}
void printSymbolTable(FILE *file) {
for (int k= 0; k<symbolTable.length; k++) {
int i=printCode(file,symbolTable.elements[k]->rule);
fprintf(file,"Node *%s= &node%i;\n",symbolTable.elements[k]->name,i);
symbolTable.elements[k]->number=i;
}
}
void declareSymbols(FILE *file) {
for (int k= 0; k<symbolTable.length; k++) {
fprintf(file,"Symbol symbol_%s ;\n",symbolTable.elements[k]->name);
}
}
void defineSymbols(FILE *file) {
for (int k= 0; k<symbolTable.length; k++) {
Symbol *s=symbolTable.elements[k];
fprintf(file,"Symbol symbol_%s = { \"%s\", &node%i, %i};\n",s->name,s->name,s->number,s->number);
}
}
int main(int argc, char **argv)
{
char *opt_f=0;
char *opt_c=0;
char *opt__=0;
for(int i=1;i<argc;i++){
char *arg=argv[i];
if(!strcmp(arg,"-f") && i<argc-1){
i++;
opt_f=argv[i];
continue;
}
if(!strcmp(arg,"-c") && i<argc-1){
i++;
opt_c=argv[i];
continue;
}
opt__=arg;
}
if (opt_f && opt__) fatal("file and command line expressions cannot both be supplied");
if (!opt_f && !opt__) fatal("no expression specified");
if (opt__) inputBuffer = mkInputBuffer(argv[1]);
if (opt_f){
FILE *fp= fopen(argv[2], "r");
if (!fp) {
perror(argv[2]);
exit(1);
}
struct stat sb;
if (fstat(fileno(fp), &sb)) {
perror(argv[2]);
exit(1);
}
char *text= malloc(sb.st_size);
if (!text) {
fatal("out of memory");
exit(1);
}
if (fread(text, sb.st_size, 1, fp) < 1) {
perror(argv[2]);
exit(1);
}
fclose(fp);
inputBuffer = mkInputBuffer(text);
}
if (!yyparse()) {
printf("Error\n");
return 1;
}
if( opt_c){
//int i=printCode(intern("start")->rule);
//printf("Node *start_rule= &node%i;\n",i);
FILE *outputFile = fopen(opt_c,"w");
declareSymbols(outputFile);
printSymbolTable(outputFile);
defineSymbols(outputFile);
return 0;
}
char *line=0;
size_t line_max=0;
ssize_t line_len=0;
Node *startRule= intern("start")->rule;
if (!startRule) {
fatal("no start rule");
return 1;
}
while ((line_len=getline(&line,&line_max,stdin))>=0) {
if (line_len>0 && line[line_len-1]=='\n') {
line[line_len-1]=0;
}
initInputBuffer(inputBuffer,line);
if (!execute(startRule, inputBuffer) || !atEnd(inputBuffer)) {
printf("no match, current position : %i\n", getPosition(inputBuffer));
}
else {
printf("match, current position : %i\n", getPosition(inputBuffer));
} // 0 => no match, 1 => match
}
return 0;
/*
switch (argc) {
case 1:{
int char_index=0;
char *text_file= malloc(50);
int ch;
while ( (ch = getchar()) != EOF ) {
printf("%c",ch);
text_file[char_index]=ch;
char_index++;
}
printf("%s",text_file);
inputBuffer = mkInputBuffer(text_file);
break;
}
case 2: {
inputBuffer = mkInputBuffer(argv[1]);
break;
}
case 3: {
if (!strcmp("-f", argv[1])) {
FILE *fp= fopen(argv[2], "r");
if (!fp) {
perror(argv[2]);
exit(1);
}
struct stat sb;
if (fstat(fileno(fp), &sb)) {
perror(argv[2]);
exit(1);
}
char *text= malloc(sb.st_size);
if (!text) {
fatal("out of memory");
exit(1);
}
if (fread(text, sb.st_size, 1, fp) < 1) {
perror(argv[2]);
exit(1);
}
fclose(fp);
inputBuffer = mkInputBuffer(text);
break;
}
fatal("unrecognised option: %s", argv[1]);
}
default: {
fatal("usage: %s parsing-expression | -f filename", argv[0]);
exit(1);
}
}
if (!yyparse()) {
printf("Error\n");
return 1;
}
char *line=0;
size_t line_max=0;
ssize_t line_len=0;
Node *startRule= intern("start")->rule;
if (!startRule) {
fatal("no start rule");
return 1;
}
while ((line_len=getline(&line,&line_max,stdin))>=0) {
if (line_len>0 && line[line_len-1]=='\n') {
line[line_len-1]=0;
}
initInputBuffer(inputBuffer,line);
if (!execute(startRule, inputBuffer) || !atEnd(inputBuffer)) {
printf("no match, current position : %i\n", getPosition(inputBuffer));
}
else {
printf("match, current position : %i\n", getPosition(inputBuffer));
} // 0 => no match, 1 => match
}
return 0;
*/
(void)yySet;
(void)yyPop;
(void)yyPush;
(void)yyAccept;
(void)yymatchDot;
(void)yymatchString;
(void)yymatchChar;
}