diff --git a/class.c b/class.c index aa170b5..af2b41d 100644 --- a/class.c +++ b/class.c @@ -12,186 +12,106 @@ int testBit(char *bits, int n) return bits[n/8] & (1 << n%8); } -int isEscapedChar(char char1, char char2){ - if( char1=='\n'){ - return 10; - } - if( char1=='\a'){ - return 7; - } - if( char1=='\b'){ - return 8; - } - if( char1=='\e'){ - return 27; - } - if( char1=='\f'){ - return 12; - } - if( char1=='\r'){ - return 13; - } - else if( char1=='\t'){ - return 9; - } - if( char1=='\v'){ - return 1; - } - else if( char1=='\\' && char2=='-'){ - return 45; - } - else if ( char1=='\\'){ - return 92; - } - else return ; //returning nothing for the moment, returning 0 printed weird character, no idea what to do -} - -int hexaToDecimal(char hex1,char hex2){ - char hex[2]; - hex[0]=hex1;hex[1]=hex2; - long long decimal = 0, base = 1; - int i = 0, value, length; - - length = 2; - for(i = length--; i >= 0; i--) - { - if(hex[i] >= '0' && hex[i] <= '9') - { - decimal += (hex[i] - 48) * base; - base *= 16; - } - else if(hex[i] >= 'A' && hex[i] <= 'F') - { - decimal += (hex[i] - 55) * base; - base *= 16; - } - else if(hex[i] >= 'a' && hex[i] <= 'f') - { - decimal += (hex[i] - 87) * base; - base *= 16; - } +int digitValue(int c) +{ + switch (c) { + case '0'...'9': return c - '0'; + case 'A'...'Z': return c - 'A' + 10; + case 'a'...'z': return c - 'a' + 10; } - return decimal; + return 666; } -int octalToDecimal(int octalNumber) +int isRadix(int r, int c) { - int decimalNumber = 0, i = 0; - - while(octalNumber != 0) - { - decimalNumber += (octalNumber%10) * pow(8,i); - ++i; - octalNumber/=10; - } - - i = 1; - - return decimalNumber; + return digitValue(c) < r; } -int isByte(char char1, char char2, char char3, char char4){ - if( char1=='\\' && char2=='x' && (char3<=55 && char3>=48) && ((char4<=57 && char4>=48) || (char4<=70 && char4>=65))){ - printf("ouaiiiiis"); - return hexaToDecimal(char3,char4); - } - else if(char1=='\\' && (char2<=55 && char2>=48) && (char3<=55 && char3>=48) && (char4<=55 && char4>=48)){ - int octal=((int)char2-48)*100+((int)char3-48)*10+(int)char4-48; - return octalToDecimal(octal); - } - else return 0 ; -} +int unescapedCharacter(char *cp, char **ep) +{ + int c= 0; + c= *cp++; + if ('\\' == c) { + c= *cp++; + switch (c) { + case 'n': c= 10; break; + case 'a': c= 7; break; + case 'b': c= 8; break; + case 'e': c= 27; break; + case 'f': c= 12; break; + case 'r': c= 13; break; + case 't': c= 9; break; + case 'v': c= 1; break; + case '\\': c= '\\'; break; + case ']': c= ']'; break; + case '-': c= '-'; break; + case '0'...'7': { + c -= '0'; // c is now the value of the digit that represented it + if (isRadix(8, *cp)) c= c * 8 + *cp++ - '0'; + if (isRadix(8, *cp)) c= c * 8 + *cp++ - '0'; + break; + } + case 'x': { + char *op= cp; + c= 0; + if (isRadix(16, *cp)) { + c= c * 16 + digitValue(*cp++); + if (isRadix(16, *cp)) { + c= c * 16 + digitValue(*cp++); + break; + } + } + fprintf(stderr, "illegal hexadecimal escape: \\x%.2s\n", op); + break; + } + default: + fprintf(stderr, "unusual escape: \\%c\n", c); + break; + } + } + if (ep) *ep= cp; + return c; +} char *classify(char *spec) { - char *class= malloc(32); // 256 bits indicating if character N (bit position N) is in the class or not - int index=0; - while (spec[index] != '\0' ) { //*spec - // go through spec converting each entry in the spec into a set of bits in the class - // test if the next thing is \ . - // or test if you have X-Y (where X or Y might be a character or escaped character) - // \n-\r - if (spec[index]=='-' && index!=0 && !(index==1 && spec[0]=='^') && spec[index+1]!='\0'){ - - if(spec[index-1]<65 || spec[index+1]>122 || (spec[index-1]>90 && spec[index-1]<97) || (spec[index+1]>90 && spec[index+1]<97)){ - //= if(spec[index-1] or spec[index+1] are not letters) - - if((spec[index-1]>47 && spec[index-1]<57) || (spec[index+1]>47 && spec[index+1]<57) ){ //if it's a digit range - if(spec[index-1]>spec[index+1]){ - printf("Error, first digit greater than the second one\n"); - } - else{ - for(int j=spec[index-1]; j<=spec[index+1] ; j++){ - setBit(class,j); - printf("added digit : %c\n",j); - - } - index++; - } - } - else{ - printf("Error, bad use of range"); - exit(1); - } - } - else{ - if(spec[index-1]>spec[index+1]){ - printf("Error, first char greater than the second one\n"); - exit(1); - } - else if(spec[index-1]>=65 && spec[index-1]<=90 && spec[index+1]>=65 && spec[index+1]<=90 ){ - for(int j=spec[index-1]; j<=spec[index+1] ; j++){ - setBit(class,j); - printf("added char : %c\n",j); - } - index++; - } - else if( spec[index-1]>=97 && spec[index-1]<=122 && spec[index+1]>=97 && spec[index+1]<=122 ){ - for(int j=spec[index-1]; j<=spec[index+1] ; j++){ - setBit(class,j); - printf("added char : %c\n",j); - - } - index++; - } - else{ - printf("Bad use of char range"); - exit(1); - } - } - - } - else if((spec[index]>=65 && spec[index]<=90) || (spec[index]>=97 && spec[index]<=122) || (spec[index]<=57 && spec[index]>=48) ){ - setBit(class,spec[index]); - printf("added char : %c\n",spec[index]); - } - - else if(isByte(spec[index],spec[index+1],spec[index+2],spec[index+3])){ - setBit(class,isByte(spec[index],spec[index+1],spec[index+2],spec[index+3])); - index=index+4; - } - else if(isEscapedChar(spec[index],spec[index+1])!=0){ - if(isEscapedChar(spec[index],spec[index+1])==45){ - index++; - } - setBit(class,isEscapedChar(spec[index],spec[index+1])); - } - - printf("%c\n",spec[index]); - printf("%i\n",index); - index++; - } - - if (spec[0]==94) { - for (int i= 0; i < 8; ++i) class[i] ^= 255; // invert all bits in the class - } + char *class= calloc(1, 32); + + int negated= 0; + if ('^' == *spec) ++negated, ++spec; + + int prevChar= 0; + while (*spec) { + if ('-' == *spec && prevChar && spec[1]) { + ++spec; + int thisChar= unescapedCharacter(spec, &spec); + if (thisChar < prevChar) { + fprintf(stderr, "range is backwards: \\x%x-\\x%x\n", prevChar, thisChar); + int tmp= prevChar; + prevChar= thisChar; + thisChar= tmp; + } + for (int i= prevChar; i <= thisChar; ++i) + setBit(class, i); + prevChar= 0; + continue; + } + prevChar= *spec; + setBit(class, unescapedCharacter(spec, &spec)); + } + + if (negated) + for (int i= 0; i < 32; ++i) + class[i] ^= 255; // invert all bits in the class return class; } + +#ifdef TESTING_CLASS_C int main() { char *line=0; @@ -212,4 +132,5 @@ int main() return 0; } +#endif