Dynamic PEG for interpreted languages.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

215 lines
6.0 KiB

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
void setBit(char *bits, int n)
{
bits[n/8] |= (1 << n%8);
}
int testBit(char *bits, int n)
{
return bits[n/8] & (1 << n%8);
}
int isEscapedChar(char char1, char char2){
if( char1=='\n'){
return 10;
}
if( char1=='\a'){
return 7;
}
if( char1=='\b'){
return 8;
}
if( char1=='\e'){
return 27;
}
if( char1=='\f'){
return 12;
}
if( char1=='\r'){
return 13;
}
else if( char1=='\t'){
return 9;
}
if( char1=='\v'){
return 1;
}
else if( char1=='\\' && char2=='-'){
return 45;
}
else if ( char1=='\\'){
return 92;
}
else return ; //returning nothing for the moment, returning 0 printed weird character, no idea what to do
}
int hexaToDecimal(char hex1,char hex2){
char hex[2];
hex[0]=hex1;hex[1]=hex2;
long long decimal = 0, base = 1;
int i = 0, value, length;
length = 2;
for(i = length--; i >= 0; i--)
{
if(hex[i] >= '0' && hex[i] <= '9')
{
decimal += (hex[i] - 48) * base;
base *= 16;
}
else if(hex[i] >= 'A' && hex[i] <= 'F')
{
decimal += (hex[i] - 55) * base;
base *= 16;
}
else if(hex[i] >= 'a' && hex[i] <= 'f')
{
decimal += (hex[i] - 87) * base;
base *= 16;
}
}
return decimal;
}
int octalToDecimal(int octalNumber)
{
int decimalNumber = 0, i = 0;
while(octalNumber != 0)
{
decimalNumber += (octalNumber%10) * pow(8,i);
++i;
octalNumber/=10;
}
i = 1;
return decimalNumber;
}
int isByte(char char1, char char2, char char3, char char4){
if( char1=='\\' && char2=='x' && (char3<=55 && char3>=48) && ((char4<=57 && char4>=48) || (char4<=70 && char4>=65))){
printf("ouaiiiiis");
return hexaToDecimal(char3,char4);
}
else if(char1=='\\' && (char2<=55 && char2>=48) && (char3<=55 && char3>=48) && (char4<=55 && char4>=48)){
int octal=((int)char2-48)*100+((int)char3-48)*10+(int)char4-48;
return octalToDecimal(octal);
}
else return 0 ;
}
char *classify(char *spec)
{
char *class= malloc(32); // 256 bits indicating if character N (bit position N) is in the class or not
int index=0;
while (spec[index] != '\0' ) { //*spec
// go through spec converting each entry in the spec into a set of bits in the class
// test if the next thing is \ .
// or test if you have X-Y (where X or Y might be a character or escaped character)
// \n-\r
if (spec[index]=='-' && index!=0 && !(index==1 && spec[0]=='^') && spec[index+1]!='\0'){
if(spec[index-1]<65 || spec[index+1]>122 || (spec[index-1]>90 && spec[index-1]<97) || (spec[index+1]>90 && spec[index+1]<97)){
//= if(spec[index-1] or spec[index+1] are not letters)
if((spec[index-1]>47 && spec[index-1]<57) || (spec[index+1]>47 && spec[index+1]<57) ){ //if it's a digit range
if(spec[index-1]>spec[index+1]){
printf("Error, first digit greater than the second one\n");
}
else{
for(int j=spec[index-1]; j<=spec[index+1] ; j++){
setBit(class,j);
printf("added digit : %c\n",j);
}
index++;
}
}
else{
printf("Error, bad use of range");
exit(1);
}
}
else{
if(spec[index-1]>spec[index+1]){
printf("Error, first char greater than the second one\n");
exit(1);
}
else if(spec[index-1]>=65 && spec[index-1]<=90 && spec[index+1]>=65 && spec[index+1]<=90 ){
for(int j=spec[index-1]; j<=spec[index+1] ; j++){
setBit(class,j);
printf("added char : %c\n",j);
}
index++;
}
else if( spec[index-1]>=97 && spec[index-1]<=122 && spec[index+1]>=97 && spec[index+1]<=122 ){
for(int j=spec[index-1]; j<=spec[index+1] ; j++){
setBit(class,j);
printf("added char : %c\n",j);
}
index++;
}
else{
printf("Bad use of char range");
exit(1);
}
}
}
else if((spec[index]>=65 && spec[index]<=90) || (spec[index]>=97 && spec[index]<=122) || (spec[index]<=57 && spec[index]>=48) ){
setBit(class,spec[index]);
printf("added char : %c\n",spec[index]);
}
else if(isByte(spec[index],spec[index+1],spec[index+2],spec[index+3])){
setBit(class,isByte(spec[index],spec[index+1],spec[index+2],spec[index+3]));
index=index+4;
}
else if(isEscapedChar(spec[index],spec[index+1])!=0){
if(isEscapedChar(spec[index],spec[index+1])==45){
index++;
}
setBit(class,isEscapedChar(spec[index],spec[index+1]));
}
printf("%c\n",spec[index]);
printf("%i\n",index);
index++;
}
if (spec[0]==94) {
for (int i= 0; i < 8; ++i) class[i] ^= 255; // invert all bits in the class
}
return class;
}
int main()
{
char *line=0;
size_t line_max=0;
ssize_t line_len=0;
char a='a';
printf("%i\n",(int)a);
printf("%s",classify("a"));
while ((line_len= getline(&line,&line_max,stdin)) >= 0) {
if (line_len>0 && line[line_len-1]=='\n') {
line[line_len-1]=0;
}
char *class= classify(line);
for (int i= 0; i < 256; ++i)
if (testBit(class, i))
printf("%02x is set\n", i);
}
return 0;
}