Pārlūkot izejas kodu

Add emitByteCode functions to support using the peg VM, add support for @ actions, automatically generate the minproto.grammar file from minproto.leg and use that in the parser generator. Fix issue with backslashes being stupid.

master
MaximeBarniaudy pirms 10 mēnešiem
vecāks
revīzija
168631e246
6 mainītis faili ar 1051 papildinājumiem un 111 dzēšanām
  1. +4
    -1
      Makefile
  2. +682
    -79
      grammar_parser.meta
  3. +316
    -0
      minproto.grammar
  4. +27
    -18
      minproto.leg
  5. +6
    -5
      rawgrammar.leg
  6. +16
    -8
      rawminproto.leg

+ 4
- 1
Makefile Parādīt failu

@ -15,7 +15,7 @@ MAIN = minproto
all : $(MAIN)
% : %.c
% : %.c %.grammar
$(CC) $(GFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LDLIBS)
%-opt : %.c
@ -27,6 +27,9 @@ all : $(MAIN)
%.c : %.leg
leg -o $@ $<
%.grammar : %.leg
( sed "/%{/,/%}/d" $< | sed "/%%/,\$$d" ) > $@
tests : .FORCE
$(MAKE) clean all GFLAGS="-Wno-unused -g -DTYPECODES=1 -DELOPT=1"
-./$(MAIN) test.txt > test.out 2>&1

+ 682
- 79
grammar_parser.meta
Failā izmaiņas netiks attēlotas, jo tās ir par lielu
Parādīt failu


+ 316
- 0
minproto.grammar Parādīt failu

@ -0,0 +1,316 @@
# minproto.leg -- minimal prototype langauge for semantic experiments
#
# last edited: 2024-07-04 10:07:00 by piumarta on zora
start = - ( s:stmt { global yysval = s }
| !. { global yysval = 0 }
| < (!EOL .)* > { syntaxError(yytext) }
)
stmt = WHILE LPAREN c:expr RPAREN s:stmt { $$ = newWhile(c, s) }
| IF LPAREN c:expr RPAREN s:stmt
( ELSE t:stmt { $$ = newIf(c, s, t ) }
| { $$ = newIf(c, s, nil) }
)
| CONT EOS { $$ = newContinue() }
| BREAK e:expr EOS { $$ = newBreak(e) }
| BREAK EOS { $$ = newBreak(nil) }
| RETURN e:expr EOS { $$ = newReturn(e) }
| RETURN EOS { $$ = newReturn(nil) }
| FOR LPAREN i:id IN e:expr RPAREN
s:stmt { $$ = newForIn(i, e, s) }
| FOR LPAREN i:id FROM a:expr
TO b:expr RPAREN s:stmt { $$ = newForFromTo(i, a, b, s) }
| FOR LPAREN i:expr SEMI c:expr SEMI
u:expr RPAREN s:stmt { $$ = newFor(i, c, u, s) }
| TRY t:stmt
( CATCH LPAREN i:id RPAREN c:stmt { $$ = newTryCatch(t, i, c) }
| ENSURE e:stmt { $$ = newTryEnsure(t, e) }
)
| RAISE e:expr EOS { $$ = newRaise(e) }
| LOCAL i:id p:params b:block { $$ = newSetLocal (i, newLambda(p, b, nil, i)) }
| GLOBAL i:id p:params b:block { $$ = newSetGlobal(i, newLambda(p, b, nil, i)) }
| i:id p:params b:block { $$ = newSetVar (i, newLambda(p, b, nil, i)) }
| v:proto DOT i:id p:params b:block { $$ = newSetProp(v, i, newLambda(p, b, v, i)) }
| b:block { $$ = newBlock(b) }
| e:expr EOS { $$ = e }
proto = v:var ( DOT j:id !LPAREN { v = newGetProp(v, j) }
)* { $$ = v }
EOS = SEMI+ | &RBRACE | &ELSE | &CATCH
expr = LOCAL i:id ASSIGN e:expr { $$ = newSetLocal (i, e) }
| GLOBAL i:id ASSIGN e:expr { $$ = newSetGlobal(i, e) }
| i:id ASSIGN e:expr { $$ = newSetVar (i, e) }
| l:logor ( ASSIGN r:expr { l = assign(l, r) }
| PLUSEQ r:expr { l = newBinop(opPreAdd, lvalue(l), r) }
| MINUSEQ r:expr { l = newBinop(opPreSub, lvalue(l), r) }
| STAREQ r:expr { l = newBinop(opPreMul, lvalue(l), r) }
| SLASHEQ r:expr { l = newBinop(opPreDiv, lvalue(l), r) }
| PCENTEQ r:expr { l = newBinop(opPreMod, lvalue(l), r) }
| SHLEQ r:expr { l = newBinop(opPreShl, lvalue(l), r) }
| SHREQ r:expr { l = newBinop(opPreShr, lvalue(l), r) }
| ANDEQ r:expr { l = newBinop(opPreAnd, lvalue(l), r) }
| XOREQ r:expr { l = newBinop(opPreXor, lvalue(l), r) }
| OREQ r:expr { l = newBinop(opPreOr, lvalue(l), r) }
)? { $$ = l }
logor = l:logand ( BARBAR r:logand { l = newBinop(opLogOr, l, r) }
)* { $$ = l }
logand = l:bitor ( ANDAND r:bitor { l = newBinop(opLogAnd, l, r) }
)* { $$ = l }
bitor = l:bitxor ( OR r:bitxor { l = newBinop(opBitOr, l, r) }
)* { $$ = l }
bitxor = l:bitand ( XOR r:bitand { l = newBinop(opBitXor, l, r) }
)* { $$ = l }
bitand = l:eq ( AND r:eq { l = newBinop(opBitAnd, l, r) }
)* { $$ = l }
eq = l:ineq ( EQ r:ineq { l = newBinop(opEq, l, r) }
| NOTEQ r:ineq { l = newBinop(opNotEq, l, r) }
)* { $$ = l }
ineq = l:shift ( LESS r:shift { l = newBinop(opLess, l, r) }
| LESSEQ r:shift { l = newBinop(opLessEq, l, r) }
| GRTREQ r:shift { l = newBinop(opGrtrEq, l, r) }
| GRTR r:shift { l = newBinop(opGrtr, l, r) }
)* { $$ = l }
shift = l:sum ( SHL r:sum { l = newBinop(opShl, l, r) }
| SHR r:sum { l = newBinop(opShr, l, r) }
)* { $$ = l }
sum = l:prod ( PLUS r:prod { l = newBinop(opAdd, l, r) }
| MINUS r:prod { l = newBinop(opSub, l, r) }
)* { $$ = l }
prod = l:prefix ( STAR r:prefix { l = newBinop(opMul, l, r) }
| SLASH r:prefix { l = newBinop(opDiv, l, r) }
| PCENT r:prefix { l = newBinop(opMod, l, r) }
) * { $$ = l }
prefix = PPLUS p:prefix { $$ = newBinop(opPreAdd, lvalue(p), newInteger(1)) }
| MMINUS p:prefix { $$ = newBinop(opPreSub, lvalue(p), newInteger(1)) }
| PLING p:prefix { $$ = newUnyop(opNot, p) }
| MINUS p:prefix { $$ = newUnyop(opNeg, p) }
| TILDE p:prefix { $$ = newUnyop(opCom, p) }
| BQUOTE s:stmt { $$ = newUnyop(opQuasiquote, s) }
| COMMAT e:expr { $$ = newUnyop(opUnquote, e) }
| postfix
postfix = SUPER DOT i:id a:args { $$ = newSuper(i, a) }
| p:primary
( LBRAK
( COLON ( RBRAK { p = newGetSlice(p, nil, nil) }
| e:xexpr RBRAK { p = newGetSlice(p, nil, e) }
)
| s:xexpr ( COLON ( RBRAK { p = newGetSlice(p, s, nil) }
| e:xexpr RBRAK { p = newGetSlice(p, s, e) }
)
| RBRAK { p = newGetArray(p, s) }
)
)
| DOT i:id ( a:args !LBRACE { p = newInvoke(p, i, a) }
| { p = newGetProp(p, i) }
)
| a:args !LBRACE { p = newApply(p, a) }
)*
( PPLUS { p = newBinop(opPostAdd, lvalue(p), newInteger( 1)) }
| MMINUS { p = newBinop(opPostAdd, lvalue(p), newInteger(-1)) }
)? { $$ = p }
args = LPAREN a:mkobj
( RPAREN
| ( k:id COLON e:xexpr { Object_put(a, k, e) }
| e:xexpr { Object_push(a, e) }
)
( COMMA ( k:id COLON e:xexpr { Object_put(a, k, e) }
| e:xexpr { Object_push(a, e) }
) )* RPAREN ) { $$ = a }
params = LPAREN p:mkobj
( RPAREN
| i:id ( COLON e:expr { Object_put(p, i, e) }
| { Object_push(p, i) }
)
( COMMA i:id ( COLON e:expr { Object_put(p, i, e) }
| { Object_push(p, i) }
)
)* RPAREN ) { $$ = p }
mkobj = { $$ = (global new)(pObject) }
primary = nil | number | string | symbol | var | lambda | subexpr | literal # | regex
lambda = p:params b:block { $$ = newLambda(p, b, nil, nil) }
subexpr = LPAREN e:expr RPAREN { $$ = e }
| b:block { $$ = newBlock(b) }
literal = LBRAK o:mkobj
( RBRAK
| ( ( i:id COLON e:expr { Object_put(o, i, e) }
| e:expr { Object_push(o, e) }
) ( COMMA ( i:id COLON e:expr { Object_put(o, i, e) }
| e:expr { Object_push(o, e) }
) )* )? RBRAK ) { $$ = newLiteral(o) }
block = LBRACE b:mkobj
( e:stmt { Object_push(b, e) }
)* ( RBRACE { $$ = b }
| error @{ expected("statement or \x7D", yytext) }
)
nil = NIL { $$ = nil }
number = "-" n:unsign { $$ = neg(n) }
| "+" n:number { $$ = n }
| n:unsign { $$ = n }
unsign = < DIGIT* '.' DIGIT+ EXP? > - { $$ = newFloat(strtod(yytext, 0)) }
| "0" [bB] < BIGIT+ > - { $$ = newInteger(strtol(yytext, 0, 2)) }
| "0" [xX] < HIGIT+ > - { $$ = newInteger(strtol(yytext, 0, 16)) }
| "0" < OIGIT* > - { $$ = newInteger(strtol(yytext, 0, 8)) }
| < DIGIT+ > - { $$ = newInteger(strtol(yytext, 0, 10)) }
| "'" < char > "'" - { $$ = newInteger(_get(newStringUnescaped(yytext), String,value)[0]) }
string = '"' < ( !'"' char )* > '"' - { $$ = newStringUnescaped(yytext) }
char = '\\' [abefnrtv'"\[\]\\]
| '\\' [0-3][0-7][0-7]
| '\\' [xX] HIGIT*
| '\\' [0-7][0-7]?
| !'\\' .
# char = "\\" ( ["'\\abfnrtv]
# | [xX] HIGIT*
# | [0-7][0-7]?[0-7]?
# )
# | .
symbol = HASH i:id { $$ = i }
var = LOCAL i:id { $$ = newGetLocal (i) }
| GLOBAL i:id { $$ = newGetGlobal(i) }
| i:id { $$ = newGetVar (i) }
id = < LETTER ALNUM* > - { $$ = intern(yytext) }
# regex = SLASH a:alts SLASH { $$ = a }
# alts = s:seq ( OR t:seq { s = Alt_append(t) }
# )* { $$ = s }
# seq = p:pre ( q:pre { s = Seq_append(t) }
# )* { $$ = s }
# elt = action | pre
# action = b:block { $$ = newAction(b) }
# pre = PLING p:pre { $$ = newNot(p) }
# | AND p:pre { $$ = newAnd(p) }
# | post
# post = a:atom ( STAR { a = newMany(a) }
# | PLUS { a = newMore(a) }
# | QUERY { a = newMore(a) }
# )? { $$ = a }
# atom = DOT { $$ = newDot() }
# | "[" ( !"]" "\\"? . )* "]" - { $$ = newClass(yytext) }
# | '"' xxxxxx
# class = LBRAK
BIGIT = [0-1]
OIGIT = [0-7]
DIGIT = [0-9]
HIGIT = [0-9A-Fa-f]
LETTER = [A-Za-z_$?]
ALNUM = LETTER | DIGIT
SIGN = [-+]
EXP = [eE] SIGN DIGIT+
- = SPACE*
SPACE = [ \t] | EOL | SLC | MLC
EOL = [\n\r] { ++lineno }
SLC = "//" (!EOL .)*
MLC = "/*" ( MLC | !"*/" (EOL | .))* "*/" -
NIL = "nil" !ALNUM -
WHILE = "while" !ALNUM -
IF = "if" !ALNUM -
ELSE = "else" !ALNUM -
FOR = "for" !ALNUM -
IN = "in" !ALNUM -
FROM = "from" !ALNUM -
TO = "to" !ALNUM -
CONT = "continue" !ALNUM -
BREAK = "break" !ALNUM -
RETURN = "return" !ALNUM -
TRY = "try" !ALNUM -
CATCH = "catch" !ALNUM -
ENSURE = "ensure" !ALNUM -
RAISE = "raise" !ALNUM -
GLOBAL = "global" !ALNUM -
LOCAL = "local" !ALNUM -
SUPER = "super" !ALNUM -
BQUOTE = "`" -
COMMAT = "@" -
HASH = "#" -
SEMI = ";" -
ASSIGN = "=" ![=] -
COMMA = "," -
COLON = ":" ![:] -
LPAREN = "(" -
RPAREN = ")" -
LBRAK = "[" -
RBRAK = "]" -
LBRACE = "{" -
RBRACE = "}" -
BARBAR = "||" ![=] -
ANDAND = "&&" ![=] -
OR = "|" ![|=] -
OREQ = "|=" -
XOR = "^" ![=] -
XOREQ = "^=" -
AND = "&" ![&=] -
ANDEQ = "&=" -
EQ = "==" -
NOTEQ = "!=" -
LESS = "<" ![<=] -
LESSEQ = "<=" -
GRTREQ = ">=" -
GRTR = ">" ![=] -
SHL = "<<" ![=] -
SHLEQ = "<<=" -
SHR = ">>" ![=] -
SHREQ = ">>=" -
PLUS = "+" ![+=] -
PLUSEQ = "+=" -
PPLUS = "++" -
MINUS = "-" ![-=] -
MINUSEQ = "-=" -
MMINUS = "--" -
STAR = "*" ![=] -
STAREQ = "*=" -
SLASH = "/" ![/=] -
SLASHEQ = "/=" -
PCENT = "%" ![=] -
PCENTEQ = "%=" -
DOT = "." ![.] -
PLING = "!" ![=] -
TILDE = "~" -
error = - < (!EOL .)* >
xexpr = expr | error @{ expected("expression", yytext) }

+ 27
- 18
minproto.leg Parādīt failu

@ -3394,10 +3394,12 @@ void expected(char *what, char *where)
fatal("syntax error: %s expected near: %s", what, where);
}
#define global
%}
start = - ( s:stmt { yysval = s }
| !. { yysval = 0 }
start = - ( s:stmt { global yysval = s }
| !. { global yysval = 0 }
| < (!EOL .)* > { syntaxError(yytext) }
)
@ -3537,7 +3539,7 @@ params = LPAREN p:mkobj
)
)* RPAREN ) { $$ = p }
mkobj = { $$ = new(pObject) }
mkobj = { $$ = (global new)(pObject) }
primary = nil | number | string | symbol | var | lambda | subexpr | literal # | regex
@ -3575,11 +3577,16 @@ unsign = < DIGIT* '.' DIGIT+ EXP? > - { $$ = newFloat(strtod(yytext, 0
string = '"' < ( !'"' char )* > '"' - { $$ = newStringUnescaped(yytext) }
char = "\\" ( ["'\\abfnrtv]
| [xX] HIGIT*
| [0-7][0-7]?[0-7]?
)
| .
char = '\\' [abefnrtv'"\[\]\\]
| '\\' [0-3][0-7][0-7]
| '\\' [xX] HIGIT*
| '\\' [0-7][0-7]?
| !'\\' .
# char = "\\" ( ["'\\abfnrtv]
# | [xX] HIGIT*
# | [0-7][0-7]?[0-7]?
# )
# | .
symbol = HASH i:id { $$ = i }
@ -3695,7 +3702,6 @@ SLASHEQ = "/=" -
PCENT = "%" ![=] -
PCENTEQ = "%=" -
DOT = "." ![.] -
DOTDOT = ".." -
PLING = "!" ![=] -
TILDE = "~" -
@ -3705,6 +3711,7 @@ xexpr = expr | error @{ expected("expression", yytext) }
%%
;
#undef global
#if PROFILE
@ -4389,7 +4396,8 @@ oop prim_eval(oop func, oop self, oop args, oop env)
oop *indexed = _get(args, Object,indexed);
oop result = nil;
if (nil != Object_getLocal(args, sym_env)) {
//if (nil != Object_getLocal(args, sym_env)) {
if (Object_find(args, sym_env) >= 0) {
env = Object_getLocal(args, sym_env);
}
@ -4908,7 +4916,7 @@ typedef struct vmState
oop variables;
} vmState;
#define VM_STATE_INITIALISER { nil, nil }
#define VM_STATE_INITIALISER { nil, new(pObject) }
void vmEnter(vmState *state, oop obj, char *yytext, int yyleng)
{
@ -4923,7 +4931,7 @@ void vmSet(vmState *state, oop obj, char *yytext, int yyleng)
void vmAction(vmState *state, oop obj, char *yytext, int yyleng)
{
oop text = yyleng ? newStringLen(yytext, yyleng) : nil;
oop text = yyleng >= 0 ? newStringLen(yytext, yyleng) : nil;
Object_put(state->variables, sym_yytext, text);
Object_put(state->variables, sym_yyleng, newInteger(yyleng));
applyThunkIn(obj, state->variables);
@ -4933,6 +4941,7 @@ void vmLeave(vmState *state, oop obj, char *yytext, int yyleng)
{
state->result = Object_getLocal(state->variables, sym_$$);
state->variables = _getDelegate(state->variables);
Object_put(state->variables, sym_$$, state->result);
}
void vmDisassemble(vmInsn *code, int pc)
@ -5130,6 +5139,8 @@ int vmRun(oop grammar0, oop symbol, char *text, int start, int length)
C##stack[--C##sp]; \
})
saveAction(vmEnter, nil, 0, 0);
for (;;) {
if (opt_d) vmDisassemble(frame.code, frame.pc);
vmInsn *i = frame.code + frame.pc++;
@ -5278,8 +5289,6 @@ int vmRun(oop grammar0, oop symbol, char *text, int start, int length)
break;
}
saveAction(vmLeave, nil, 0, 0);
#undef pop
#undef drop
#undef push
@ -5457,16 +5466,16 @@ int main(int argc, char **argv)
#define stringify(x) #x
#define declareOp(NAME, OP) _set(intern(stringify(__op##NAME)), Symbol,value, newInteger(op##NAME));
#define declareOp(NAME, OP) _set(intern(stringify(op##NAME)), Symbol,value, newInteger(op##NAME));
doBinops(declareOp)
#undef declareOp
#define declareOp(NAME, OP) _set(intern(stringify(__##NAME)), Symbol,value, newInteger(NAME));
#undef stringify
#define declareOp(NAME, OP) _set(intern(#NAME), Symbol,value, newInteger(NAME));
doUnyops(declareOp)
#undef declareOp
#undef stringify
#if TYPECODES
# define defineEvaluator(NAME) \

+ 6
- 5
rawgrammar.leg Parādīt failu

@ -9,11 +9,12 @@ expression = s:sequence !BAR { s; }
( BAR s2:sequence { s1.push(s2); }
) * { s1; }
sequence = p:prefix { p = Sequence.new().push(p); }
sequence = p:prefix ( q:prefix { p = Sequence.new().push(p).push(q); }
( q:prefix { p.push(q); }
) * { p; }
) * ) ? { p; }
prefix = AND a:action { ParseTimeAction.new(action: a); }
| AT a:action { ExecuteAction.new(action: a); }
|
( AND s:suffix { And.new(expression: s); }
| NOT s:suffix { Not.new(expression: s); }
@ -33,8 +34,7 @@ primary = i1:identifier COLON i2:ruleCall !ASSIGN { Assignment.new(name:
| c:class { c; }
| DOT { Dot.new(); }
| a:action { a; }
| BEGIN { Begin.new(); }
| END { End.new(); }
| BEGIN e:expression END { Capture.new(expression: e); }
identifier = < [-a-zA-Z_][-a-zA-Z_0-9]* > - { intern(yytext); }
@ -53,7 +53,7 @@ char = '\\' [abefnrtv'"\[\]\\]
| '\\' [0-7][0-7]?
| !'\\' .
action = m:metaLanguage::block - { Action.new(parseTree: m); }
action = m:metaLanguage::block - { Action.new(parseTree: Block.new(body: m)); }
- = ( space | comment )*
space = ' ' | '\t' | end-of-line
@ -76,3 +76,4 @@ PLUS = "+" ![+=] -
STAR = "*" ![=] -
DOT = "." ![.] -
COLON = ":" ![:] -
AT = "@" -

+ 16
- 8
rawminproto.leg Parādīt failu

@ -152,7 +152,7 @@ literal = LBRAK o:mkobj
block = LBRACE b:mkobj
( e:stmt { b.push(e) }
)* RBRACE { Block.new(body: b) }
)* RBRACE { b }
nil = NIL { nil }
@ -167,13 +167,21 @@ unsign = < DIGIT* '.' DIGIT+ EXP? > - { yytext.asFloat() }
| < DIGIT+ > - { yytext.asInteger() }
| "'" < char > "'" - { ord(yytext.unescaped()) }
string = '"' < ( !'"' char )* > '"' - { yytext.unescaped() }
string = '"' < ( !'"' char )* > '"' - { yytext }
char = "\\" ( ["'\\abfnrtv]
| [xX] HIGIT*
| [0-7][0-7]?[0-7]?
)
| .
# Version originale, qui ne parse pas deux antislash pour une raison obscure
# char = "\\" ( ["'\\abfnrtv]
# | [xX] HIGIT*
# | [0-7][0-7]?[0-7]?
# )
# | .
# Version de rawgrammar.leg, qui fonctionne sans problème
char = '\\' [abefnrtv'"\[\]\\]
| '\\' [0-3][0-7][0-7]
| '\\' [0-7][0-7]?
| '\\' [xX] HIGIT+
| !'\\' .
symbol = HASH i:id { i }
@ -187,7 +195,7 @@ BIGIT = [0-1]
OIGIT = [0-7]
DIGIT = [0-9]
HIGIT = [0-9A-Fa-f]
LETTER = [A-Za-z_]
LETTER = [A-Za-z_$?]
ALNUM = LETTER | DIGIT
SIGN = [-+]
EXP = [eE] SIGN DIGIT+

Notiek ielāde…
Atcelt
Saglabāt