diff --git a/Makefile b/Makefile index 868efdd..94629a2 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ MAIN = minproto all : $(MAIN) -% : %.c +% : %.c %.grammar $(CC) $(GFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LDLIBS) %-opt : %.c @@ -27,6 +27,9 @@ all : $(MAIN) %.c : %.leg leg -o $@ $< +%.grammar : %.leg + ( sed "/%{/,/%}/d" $< | sed "/%%/,\$$d" ) > $@ + tests : .FORCE $(MAKE) clean all GFLAGS="-Wno-unused -g -DTYPECODES=1 -DELOPT=1" -./$(MAIN) test.txt > test.out 2>&1 diff --git a/grammar_parser.meta b/grammar_parser.meta index faed0e7..42437d2 100644 --- a/grammar_parser.meta +++ b/grammar_parser.meta @@ -1,3 +1,5 @@ +global then = cputime(); + // ----- Utils ----- true = (1 == 1); @@ -139,6 +141,65 @@ Context.declareVariable(var) { // ----- Grammar Constructs ----- +// Parser VM op-codes + +// opcodes opcode argument arglength next-pc notes +OpCodes = [ + PUSH: 0, // PUSH nil 0 ok ok both pcs should be set the same + DROP: 1, // DROP nil 0 ok ok + POP: 2, // POP nil 0 ok ok + DOT: 3, // DOT nil 0 ok ko ok/ko next pc are absolute and are + CLASS: 4, // CLASS bitarray len(bitarray) ok ko instruction indexes (0, 1, 2, ...) + STRING: 5, // STRING string len(string) ok ko and not array indexes (0, 5, 10, ...) + TEST: 6, + RULE2: 7, + RULE: 8, // RULE symbol 0 ok ko + CALL: 9, // CALL nil 0 ok ko do not use this; VM internal use only + SUCCEED: 10, // SUCCEED nil 0 0 0 next pcs are ignored + FAIL: 11, // FAIL nil 0 0 0 + ACTION: 12, // ACTION function 0 ok ok both pcs should be the same + BEGIN: 13, // BEGIN nil 0 ok ok + END: 14, // END nil 0 ok ok + UNEND: 15, // UNEND nil 0 ok ok + SET: 16 // SET symbol 0 ok ok +]; + +OpCodeNames = [ + "PUSH", + "DROP", + "POP", + "DOT", + "CLASS", + "STRING", + "TEST", + "RULE2", + "RULE", + "CALL", + "SUCCEED", + "FAIL", + "ACTION", + "BEGIN", + "END", + "UNEND", + "SET" +]; + +Instruction = Object.subtype(#Instruction); + +Instruction.new(op, arg, arglen, ok, ko) { + self = super.new(); + self.op = op; + self.arg = arg; + self.arglen = arglen; + self.ok = ok; + self.ko = ko; + self; +} + +Instruction.printline() { + print(OpCodeNames[self.op], " ", self.arg, " ", self.arglen, " ", self.ok, " ", self.ko, "\n"); +} + // String Literal StringLiteral = Object.subtype(#StringLiteral); @@ -148,11 +209,18 @@ StringLiteral.getMatchingExpression() { ((stream.content.compareFrom(stream.position, @self.string) == 0) && (stream.position += len(@self.string)) && @true) - || + || @false }; } +StringLiteral.emitByteCode(instructions, ok, ko) { + //emit(STRING, (byte *)node->String.string, strlen(node->String.string), ok, ko); + instructions.push(Instruction.new(OpCodes.STRING, self.string, len(self.string), ok, ko)); + instructions.length() - 1; +} + + // Character Class CharacterClass = Object.subtype(#CharacterClass); @@ -161,6 +229,12 @@ CharacterClass.getMatchingExpression() { return `{ !stream.atEnd() && (@self.value.charClass()).bitTest(stream.peek()) && stream.inc() && @true }; } +CharacterClass.emitByteCode(instructions, ok, ko) { + //emit(CLASS, node->Class.bits, 32, ok, ko); + instructions.push(Instruction.new(OpCodes.CLASS, self.value.charClass(), 32, ok, ko)); + instructions.length() - 1; +} + // Dot Dot = Object.subtype(#Dot); @@ -169,21 +243,39 @@ Dot.getMatchingExpression() { return `{ !stream.atEnd() && stream.inc() && @true }; } -// Begin - -Begin = Object.subtype(#Begin); - -Begin.getMatchingExpression() { - return `{ stream.setLastBegin() && @true }; +Dot.emitByteCode(instructions, ok, ko) { + //emit(DOT, 0, 0, ok, ko); + instructions.push(Instruction.new(OpCodes.DOT, 0, 0, ok, ko)); + instructions.length() - 1; } -// End +// Capture -End = Object.subtype(#End); +Capture = Object.subtype(#Capture); -End.getMatchingExpression() { - return `{ (context.variables.yytext = stream.content[stream.lastBegin..stream.position].unescaped()) - && @true }; +Capture.getMatchingExpression() { + return `{ stream.setLastBegin() + && (@self.expression.getMatchingExpression()) + && (context.variables.yytext = stream.content[stream.lastBegin:stream.position].unescaped()) + && @true + }; +} + +Capture.emitByteCode(instructions, ok, ko) { + //ok = emit(END, 0, 0, ok, ok); + //ko = emit(UNEND, 0, 0, ko, ko); + //ok = generateNode(node->Capture.exp, ok, ko); + //emit(BEGIN, 0, 0, ok, ok); + // FAIS CE QUE LE CODE DIT BORDEL + + instructions.push(Instruction.new(OpCodes.END, 0, 0, ok, ok)); + ok = instructions.length() - 1; + instructions.push(Instruction.new(OpCodes.UNEND, 0, 0, ko, ko)); + ko = instructions.length() - 1; + + ok = self.expression.emitByteCode(instructions, ok, ko); + instructions.push(Instruction.new(OpCodes.BEGIN, 0, 0, ok, ok)); + instructions.length() - 1; } // Optional (? postfix operator) @@ -194,6 +286,11 @@ Optional.getMatchingExpression() { return `{ (@self.expression.getMatchingExpression()); @true }; } +Optional.emitByteCode(instructions, ok, ko) { + self.expression.emitByteCode(instructions, ok, ok); + instructions.length() - 1; +} + // Star Star = Object.subtype(#Star); @@ -202,6 +299,13 @@ Star.getMatchingExpression() { return `{ while (@self.expression.getMatchingExpression()) @true; @true }; } +Star.emitByteCode(instructions, ok, ko) { + local last = instructions.length(); + self.expression.emitByteCode(instructions, ok, ok); + instructions[last].ok = instructions.length() - 1; + instructions.length() - 1; +} + // Plus Plus = Object.subtype(#Plus); @@ -217,6 +321,14 @@ Plus.getMatchingExpression() { }; } +Plus.emitByteCode(instructions, ok, ko) { + local last = instructions.length(); + local next = self.expression.emitByteCode(instructions, ok, ok); + instructions[last].ok = instructions.length() - 1; + self.expression.emitByteCode(instructions, next, ko); + instructions.length() - 1; +} + // And And = Object.subtype(#And); @@ -233,6 +345,16 @@ And.getMatchingExpression() { }; } +And.emitByteCode(instructions, ok, ko) { + instructions.push(Instruction.new(OpCodes.POP, 0, 0, ok, ok)); + ok = instructions.length() - 1; + instructions.push(Instruction.new(OpCodes.DROP, 0, 0, ko, ko)); + ko = instructions.length() - 1; + local here = self.expression.emitByteCode(instructions, ok, ko); + instructions.push(Instruction.new(OpCodes.PUSH, 0, 0, here, here)); + instructions.length() - 1; +} + // Not Not = Object.subtype(#Not); @@ -249,6 +371,17 @@ Not.getMatchingExpression() { }; } +Not.emitByteCode(instructions, ok, ko) { + instructions.push(Instruction.new(OpCodes.POP, 0, 0, ko, ko)); + local nok = instructions.length() - 1; + instructions.push(Instruction.new(OpCodes.DROP, 0, 0, ok, ok)); + local nko = instructions.length() - 1; + + local here = self.expression.emitByteCode(instructions, nok, nko); + instructions.push(Instruction.new(OpCodes.PUSH, 0, 0, here, here)); + instructions.length() - 1; +} + // Sequence Sequence = Object.subtype(#Sequence); @@ -276,6 +409,20 @@ Sequence.getMatchingExpression() { } } +Sequence.emitByteCode(instructions, ok, ko) { + instructions.push(Instruction.new(OpCodes.DROP, 0, 0, ok, ok)); + ok = instructions.length() - 1; + instructions.push(Instruction.new(OpCodes.POP, 0, 0, ko, ko)); + ko = instructions.length() - 1; + + for (i from self.length() - 1 to 0) { + ok = self[i].emitByteCode(instructions, ok, ko); + } + + instructions.push(Instruction.new(OpCodes.PUSH, 0, 0, ok, ok)); + instructions.length() - 1; +} + // Alternation Alternation = Object.subtype(#Alternation); @@ -292,6 +439,51 @@ Alternation.getMatchingExpression() { return `{ @self._getMatchingExpression(0) }; } +Alternation.prepend(pegString) { + local stream = newStream(pegString); + local context = Context.new(outerContext: nil).init(); + local actions = []; + + peg.expression(stream, context, actions); + + local expression = { for (actionAndContext in actions) { + actionAndContext.action.execute(actionAndContext.context); + } }; + self.push(self[len(self) - 1]); + for (i from len(self) - 2 to 0) { + self[i + 1] = self[i]; + } + self[0] = expression; +} + +parseDefinition(pegString) { + local stream = newStream(pegString); + local context = Context.new(outerContext: nil).init(); + local actions = []; + + peg.definition(stream, context, actions); + + local definition = { for (actionAndContext in actions) { + actionAndContext.action.execute(actionAndContext.context); + } }; + + definition; +} + +Alternation.emitByteCode(instructions, ok, ko) { + instructions.push(Instruction.new(OpCodes.DROP, 0, 0, ok, ok)); + ok = instructions.length() - 1; + instructions.push(Instruction.new(OpCodes.POP, 0, 0, ko, ko)); + ko = instructions.length() - 1; + + for (i from self.length() - 1 to 0) { + ko = self[i].emitByteCode(instructions, ok, ko); + } + + instructions.push(Instruction.new(OpCodes.PUSH, 0, 0, ko, ko)); + instructions.length() - 1; +} + // Action Action = Object.subtype(#Action); @@ -318,7 +510,24 @@ Action.execute(context) { returnValue; } -// Parse-time Action +Action.emitByteCode(instructions, ok, ko) { + instructions.push(Instruction.new( + OpCodes.ACTION, + Closure.new( + environment: nil, + function: Lambda.new( + parameters: [], + body: self.parseTree.body + ) + ), + 0, + ok, + ok + )); + instructions.length() - 1; +} + +// Parse-time Action (&) ParseTimeAction = Object.subtype(#ParseTimeAction); @@ -326,6 +535,48 @@ ParseTimeAction.getMatchingExpression() { return `{ (@self.action).execute(context) }; } +ParseTimeAction.emitByteCode(instructions, ok, ko) { + instructions.push(Instruction.new( + OpCodes.TEST, + Closure.new( + environment: nil, + function: Lambda.new( + parameters: [], + body: self.action.parseTree.body + ) + ), + 0, + ok, + ko + )); + instructions.length() - 1; +} + +// Execute Action (@) + +ExecuteAction = Object.subtype(#ExecuteAction); + +ExecuteAction.getMatchingExpression() { + return `{ (@self.action).execute(context); @true; }; +} + +ExecuteAction.emitByteCode(instructions, ok, ko) { + instructions.push(Instruction.new( + OpCodes.TEST, + Closure.new( + environment: nil, + function: Lambda.new( + parameters: [], + body: self.action.parseTree.body + ) + ), + 0, + ok, + ok + )); + instructions.length() - 1; +} + // Assignment Assignment = Object.subtype(#Assignment); @@ -338,6 +589,13 @@ Assignment.getMatchingExpression() { } } +Assignment.emitByteCode(instructions, ok, ko) { + instructions.push(Instruction.new(OpCodes.SET, self.name, 0, ok, ok)); + ok = instructions.length() - 1; + self.rule.emitByteCode(instructions, ok, ko); + instructions.length() - 1; +} + // RuleCall RuleCall = Object.subtype(#RuleCall); @@ -346,6 +604,11 @@ RuleCall.getMatchingExpression() { `{ /*increaseCount(@self.name);*/ Invoke.new(self: self, method: @self.name, arguments: [stream, context, actions]).__eval__() }; } +RuleCall.emitByteCode(instructions, ok, ko) { + instructions.push(Instruction.new(OpCodes.RULE, self.name, 0, ok, ko)); + instructions.length() - 1; +} + // NamespacedRuleCall NamespacedRuleCall = Object.subtype(#NamespacedRuleCall); @@ -354,6 +617,11 @@ NamespacedRuleCall.getMatchingExpression(innerContext) { `{ Invoke.new(self: @get(self.namespace), method: @self.name, arguments: [stream, context, actions]).__eval__() }; } +NamespacedRuleCall.emitByteCode(instructions, ok, ko) { + instructions.push(Instruction.new(OpCodes.RULE, self.name, self.namespace, ok, ko)); + instructions.length() - 1; +} + Definition = Object.subtype(#Definition); Grammar = Object.subtype(#Grammar); @@ -374,7 +642,7 @@ Grammar.addRule(ruleName, rule) { name: ruleName, parent: self ) - ) + ); } //global functionCounts = []; @@ -427,6 +695,14 @@ metaExpression = ) ) ) + .push(Sequence.new() + .push(StringLiteral.new(string: "global")) + .push(RuleCall.new(name: #ws)) + .push(Assignment.new(name: #i, rule: RuleCall.new(name: #metaId))) + .push(RuleCall.new(name: #assign)) + .push(Assignment.new(name: #e, rule: RuleCall.new(name: #metaExpression))) + .push(Action.new(parseTree: `{ SetGlobal.new(name: i, value: e); })) + ) .push(Sequence.new() .push(Assignment.new(name: #i, rule: RuleCall.new(name: #metaId))) .push(RuleCall.new(name: #assign)) @@ -434,14 +710,30 @@ metaExpression = .push(Action.new(parseTree: `{ SetVar.new(name: i, value: e); })) ) .push(Sequence.new() - .push(Assignment.new(name: #pf, rule: RuleCall.new(name: #metaPostfix))) + .push(Assignment.new(name: #pf, rule: RuleCall.new(name: #metaPrefix))) .push(Action.new(parseTree: `{ pf; })) ); +metaPrefix = + Alternation.new() + .push(Sequence.new() + .push(RuleCall.new(name: #pplus)) + .push(Assignment.new(name: #p, rule: RuleCall.new(name: #metaPrefix))) + .push(Action.new(parseTree: `{ $$ = newBinop(opPreAdd, lvalue(p), newInteger(1)) })) + ) + .push(RuleCall.new(name: #metaPostfix)); + metaPostfix = Sequence.new() .push(Assignment.new(name: #p, rule: RuleCall.new(name: #metaPrimary))) .push(Star.new(expression: Alternation.new() + .push(Sequence.new() + .push(RuleCall.new(name: #lbrak)) + .push(Assignment.new(name: #s, rule: RuleCall.new(name: #metaExpression))) + .push(RuleCall.new(name: #rbrak)) + .push(Not.new(expression: RuleCall.new(name: #assign))) + .push(Action.new(parseTree: `{ p = newGetArray(p, s) })) + ) .push(Sequence.new() .push(RuleCall.new(name: #dot)) .push(Assignment.new(name: #i, rule: RuleCall.new(name: #metaId))) @@ -508,6 +800,7 @@ metaPrimary = Alternation.new() .push(RuleCall.new(name: #nil)) .push(RuleCall.new(name: #number)) + .push(RuleCall.new(name: #string)) .push(RuleCall.new(name: #metaVar)) .push(RuleCall.new(name: #metaSubExpr)); @@ -530,14 +823,52 @@ block = ) ) .push(RuleCall.new(name: #rbrace)) - .push(Action.new(parseTree: `{ Block.new(body: b); })); + .push(Action.new(parseTree: `{ b; })); + +string = + Sequence.new() + .push(StringLiteral.new(string: "\"")) + .push(Capture.new(expression: Star.new(expression: Sequence.new() + .push(Not.new(expression: StringLiteral.new(string: "\""))) + .push(RuleCall.new(name: #char)) + ))) + .push(StringLiteral.new(string: "\"")) + .push(RuleCall.new(name: #ws)) + .push(Action.new(parseTree: `{ $$ = newStringUnescaped(yytext) })); + +char = + Alternation.new() + .push(Sequence.new() + .push(StringLiteral.new(string: "\\")) + .push(CharacterClass.new(value: "abefnrtv\'\"[]\\")) + ) + .push(Sequence.new() + .push(StringLiteral.new(string: "\\")) + .push(CharacterClass.new(value: "0-3")) + .push(CharacterClass.new(value: "0-7")) + .push(CharacterClass.new(value: "0-7")) + ) + .push(Sequence.new() + .push(StringLiteral.new(string: "\\")) + .push(CharacterClass.new(value: "0-7")) + .push(Optional.new(expression: CharacterClass.new(value: "0-7"))) + ) + .push(Sequence.new() + .push(StringLiteral.new(string: "\\")) + .push(CharacterClass.new(value: "xX")) + .push(Star.new(expression: RuleCall.new(name: #higit))) + ) + .push(Sequence.new() + .push(Not.new(expression: StringLiteral.new(string: "\\"))) + .push(Dot.new()) + ); number = Alternation.new() .push(Sequence.new() .push(StringLiteral.new(string: "-")) .push(Assignment.new(name: #n, rule: RuleCall.new(name: #unsign))) - .push(Action.new(parseTree: `{ Unyop.new(operation: __opNeg).push(n) })) + .push(Action.new(parseTree: `{ Unyop.new(operation: opNeg).push(n) })) ) .push(Sequence.new() .push(StringLiteral.new(string: "+")) @@ -551,28 +882,39 @@ number = unsign = Sequence.new() - .push(Begin.new()) - .push(Plus.new(expression: RuleCall.new(name: #digit))) - .push(End.new()) + .push(Capture.new(expression: + Plus.new(expression: RuleCall.new(name: #digit))) + ) .push(RuleCall.new(name: #ws)) .push(Action.new(parseTree: `{ yytext.asInteger(10); })); metaVar = - Sequence.new() - .push(Assignment.new(name: #i, rule: RuleCall.new(name: #metaId))) - .push(Action.new(parseTree: `{ GetVar.new(name: i); })); + Alternation.new() + .push( + Sequence.new() + .push(StringLiteral.new(string: "global")) + .push(RuleCall.new(name: #ws)) + .push(Assignment.new(name: #i, rule: RuleCall.new(name: #metaId))) + .push(Action.new(parseTree: `{ GetGlobal.new(name: i); })) + ) + .push( + Sequence.new() + .push(Assignment.new(name: #i, rule: RuleCall.new(name: #metaId))) + .push(Action.new(parseTree: `{ GetVar.new(name: i); })) + ); metaId = Sequence.new() - .push(Begin.new()) - .push(RuleCall.new(name: #letter)) - .push(Star.new(expression: RuleCall.new(name: #alnum))) - .push(End.new()) + .push(Capture.new(expression: Sequence.new() + .push(RuleCall.new(name: #letter)) + .push(Star.new(expression: RuleCall.new(name: #alnum))) + )) .push(RuleCall.new(name: #ws)) .push(Action.new(parseTree: `{ intern(yytext); })); digit = CharacterClass.new(value: "0-9"); -letter = CharacterClass.new(value: "A-Za-z_"); +higit = CharacterClass.new(value: "0-9A-Fa-f"); +letter = CharacterClass.new(value: "A-Za-z_$"); alnum = Alternation.new() .push(RuleCall.new(name: #letter)) @@ -591,6 +933,7 @@ lbrak = Sequence.new().push(StringLiteral.new(string: "[" )).push(RuleCall.new( rbrak = Sequence.new().push(StringLiteral.new(string: "]" )).push(RuleCall.new(name: #ws)); lbrace = Sequence.new().push(StringLiteral.new(string: "{" )).push(RuleCall.new(name: #ws)); rbrace = Sequence.new().push(StringLiteral.new(string: "}" )).push(RuleCall.new(name: #ws)); +pplus = Sequence.new().push(StringLiteral.new(string: "++")).push(RuleCall.new(name: #ws)); assign = Sequence.new() @@ -685,10 +1028,13 @@ expression = sequence = Sequence.new() .push(Assignment.new(name: #p, rule: RuleCall.new(name: #prefix))) - .push(Action.new(parseTree: `{ p = Sequence.new().push(p); })) - .push(Star.new(expression: Sequence.new() + .push(Optional.new(expression: Sequence.new() .push(Assignment.new(name: #q, rule: RuleCall.new(name: #prefix))) - .push(Action.new(parseTree: `{ p.push(q); })) + .push(Action.new(parseTree: `{ p = Sequence.new().push(p).push(q); })) + .push(Star.new(expression: Sequence.new() + .push(Assignment.new(name: #q, rule: RuleCall.new(name: #prefix))) + .push(Action.new(parseTree: `{ p.push(q); })) + )) )) .push(Action.new(parseTree: `{ p; })); @@ -698,6 +1044,11 @@ prefix = .push(RuleCall.new(name: #and)) .push(RuleCall.new(name: #action)) ) + .push(Sequence.new() + .push(RuleCall.new(name: #at)) + .push(Assignment.new(name: #a, rule: RuleCall.new(name: #action))) + .push(Action.new(parseTree: `{ ExecuteAction.new(action: a); })) + ) .push(Alternation.new() .push(Sequence.new() .push(RuleCall.new(name: #and)) @@ -772,19 +1123,17 @@ primary = ) .push(Sequence.new() .push(RuleCall.new(name: #begin)) - .push(Action.new(parseTree: `{ Begin.new(); })) - ) - .push(Sequence.new() + .push(Assignment.new(name: #e, rule: RuleCall.new(name: #expression))) .push(RuleCall.new(name: #end)) - .push(Action.new(parseTree: `{ End.new(); })) + .push(Action.new(parseTree: `{ Capture.new(expression: e); })) ); identifier = Sequence.new() - .push(Begin.new()) - .push(CharacterClass.new(value: "-a-zA-Z_")) - .push(Star.new(expression: CharacterClass.new(value: "-a-zA-Z_0-9"))) - .push(End.new()) + .push(Capture.new(expression: Sequence.new() + .push(CharacterClass.new(value: "-a-zA-Z_")) + .push(Star.new(expression: CharacterClass.new(value: "-a-zA-Z_0-9"))) + )) .push(RuleCall.new(name: #ws)) .push(Action.new(parseTree: `{ intern(yytext); })); @@ -805,24 +1154,22 @@ literal = Alternation.new() .push(Sequence.new() .push(CharacterClass.new(value: "\'")) - .push(Begin.new()) - .push(Star.new(expression: Sequence.new() - .push(Not.new(expression: CharacterClass.new(value: "\'"))) - .push(RuleCall.new(name: #char)) + .push(Capture.new(expression: + Star.new(expression: Sequence.new() + .push(Not.new(expression: CharacterClass.new(value: "\'"))) + .push(RuleCall.new(name: #char))) )) - .push(End.new()) .push(CharacterClass.new(value: "\'")) .push(RuleCall.new(name: #ws)) .push(Action.new(parseTree: `{ StringLiteral.new(string: yytext); })) ) .push(Sequence.new() .push(CharacterClass.new(value: "\"")) - .push(Begin.new()) - .push(Star.new(expression: Sequence.new() - .push(Not.new(expression: CharacterClass.new(value: "\""))) - .push(RuleCall.new(name: #char)) + .push(Capture.new(expression: + Star.new(expression: Sequence.new() + .push(Not.new(expression: CharacterClass.new(value: "\""))) + .push(RuleCall.new(name: #char))) )) - .push(End.new()) .push(CharacterClass.new(value: "\"")) .push(RuleCall.new(name: #ws)) .push(Action.new(parseTree: `{ StringLiteral.new(string: yytext); })) @@ -831,15 +1178,14 @@ literal = class = Sequence.new() .push(StringLiteral.new(string: "[")) - .push(Begin.new()) - .push(Star.new(expression: Sequence.new() - .push(Not.new(expression: StringLiteral.new(string: "]"))) - .push(RuleCall.new(name: #range)) + .push(Capture.new(expression: + Star.new(expression: Sequence.new() + .push(Not.new(expression: StringLiteral.new(string: "]"))) + .push(RuleCall.new(name: #range))) )) - .push(End.new()) .push(StringLiteral.new(string: "]")) .push(RuleCall.new(name: #ws)) - .push(Action.new(parseTree: `{ CharacterClass.new(value: yytext); })); + .push(Action.new(parseTree: `{ CharacterClass.new(value: yytext.unescaped()); })); range = Alternation.new() @@ -878,7 +1224,7 @@ action = Sequence.new() .push(Assignment.new(name: #m, rule: NamespacedRuleCall.new(namespace: #HardCodedMeta, name: #block))) .push(RuleCall.new(name: #ws)) - .push(Action.new(parseTree: `{ Action.new(parseTree: m); })); + .push(Action.new(parseTree: `{ Action.new(parseTree: Block.new(body: m)); })); bar = Sequence.new().push(StringLiteral.new(string: "|")).push(RuleCall.new(name: #ws)); not = Sequence.new().push(StringLiteral.new(string: "!")).push(RuleCall.new(name: #ws)); @@ -886,6 +1232,7 @@ query = Sequence.new().push(StringLiteral.new(string: "?")).push(RuleCall.new begin = Sequence.new().push(StringLiteral.new(string: "<")).push(RuleCall.new(name: #ws)); end = Sequence.new().push(StringLiteral.new(string: ">")).push(RuleCall.new(name: #ws)); tilde = Sequence.new().push(StringLiteral.new(string: "~")).push(RuleCall.new(name: #ws)); +at = Sequence.new().push(StringLiteral.new(string: "@")).push(RuleCall.new(name: #ws)); rpercent = Sequence.new().push(StringLiteral.new(string: "%}")).push(RuleCall.new(name: #ws)); ccolon = Sequence.new().push(StringLiteral.new(string: "::")).push(RuleCall.new(name: #ws)); @@ -936,10 +1283,171 @@ ws = .push(RuleCall.new(name: #comment)) ); -// ----- Main ----- - without(all: 1); +// Functions for minproto parser + +Object_push(o, v) { + o.push(v); + o; +} + +Object_put(o, k, v) { + o[k] = v; + o; +} + +global pObject = Object; + +new(_) { + []; +} + +_get(obj, _, __) { + obj; +} + +expected(what, where) +{ + error("syntax error: " + what + " expected near: " + where); +} + +syntaxError(str) { + error("syntax error: " + str); +} + +newWhile(cond, stmt) { + While.new(condition: cond, body: stmt); +} + +newIf(c, s, t) { + If.new(condition: c, consequent: s, alternate: t); +} + +newContinue() { + Continue.new(); +} + +newBreak(e) { + Break.new(value: e); +} + +newReturn(e) { + Return.new(value: e); +} + +newForIn(i, e, s) { + ForIn.new(identifier: i, expression: e, body: s); +} + +newForFromTo(i, a, b, s) { + ForFromTo.new(identifier: i, first: a, last: b, body: s); +} + +newFor(i, c, u, s) { + For.new(initialise: i, condition: c, update: u, body: s); +} + +newTryCatch(t, i, c) { + TryCatch.new(statement: t, identifier: i, handler: c); +} + +newTryEnsure(t, e) { + TryEnsure.new(statement: t, handler: e); +} + +newRaise(e) { + Raise.new(value: e); +} + +newLambda(p, b, par, n) { + Lambda.new(parameters: p, body: b, parent: par, name: n); +} + +newGetLocal(i) { + GetLocal.new(name: i); +} + +newSetLocal(i, v) { + SetLocal.new(name: i, value: v); +} + +newGetGlobal(i) { + GetGlobal.new(name: i); +} + +newSetGlobal(i, v) { + SetGlobal.new(name: i, value: v); +} + +newGetVar(i) { + GetVar.new(name: i); +} + +newSetVar(i, v) { + SetVar.new(name: i, value: v); +} + +newGetProp(v, j) { + GetProp.new(object: v, key: j); +} + +newSetProp(o, k, v) { + SetProp.new(object: o, key: k, value: v); +} + +newBlock(b) { + Block.new(body: b); +} + +newUnyop(o, p) { + Unyop.new(operation: o).push(p); +} + +neg(n) { + Unyop.new(operation: opNeg).push(n); +} + +newSuper(i, a) { + Super.new(method: i, arguments: a); +} + +newGetArray(p, e) { + GetArray.new(object: p, index: e); +} + +newGetSlice(o, s, e) { + GetSlice.new(object: o, start: s, stop: e); +} + +newInvoke(p, i, a) { + Invoke.new(self: p, method: i, arguments: a); +} + +newLiteral(o) { + Literal.new(object: o); +} + +newFloat(f) { + f; +} + +strtod(text, _) { + text.asFloat(); +} + +newInteger(i) { + i; +} + +strtol(text, _, base) { + text.asInteger(base) +} + +newStringUnescaped(text) { + text; +} + HardCodedPeg = Grammar.new(); HardCodedMeta = Grammar.new(); @@ -948,9 +1456,10 @@ HardCodedMeta.addRulesFromNamespace(#HardCodedMeta); HardCodedPeg.__delegate__ = HardCodedMeta; -global stream = newStream(readfile("rawminproto.leg")); +global stream = newStream(readfile("minproto.grammar")); global context = Context.new(outerContext: nil).init(); global actions = []; +global lineno = 0; while(!stream.atEnd()) { HardCodedPeg.grammar(stream, context, actions); @@ -966,8 +1475,8 @@ while(!stream.atEnd()) { global actions = []; } -println("\n--------- META ---------\n\n"); -println(__namespaces__.metaLanguage); +//println("\n--------- META ---------\n\n"); +//println(__namespaces__.metaLanguage); global stream = newStream(readfile("rawgrammar.leg")); global context = Context.new(outerContext: nil).init(); @@ -986,20 +1495,26 @@ while(!stream.atEnd()) { global actions = []; } -println("\n--------- PEG ---------\n\n"); -println(__namespaces__.peg); - -// Circularity test +//println("\n--------- PEG ---------\n\n"); +//println(__namespaces__.peg); -peg = Grammar.new(); -metaLanguage = Grammar.new(); +global peg = Grammar.new(); +global metaLanguage = Grammar.new(); peg.addRulesFromNamespace(#peg); metaLanguage.addRulesFromNamespace(#metaLanguage); peg.__delegate__ = metaLanguage; -global stream2 = newStream(readfile("rawminproto.leg")); +global now = cputime(); +print("Parser Generation Time : ", now - then, "s\n"); + +// Circularity test +if (nil) { + +println("circularity test\n"); + +global stream2 = newStream(readfile("minproto.grammar")); global context2 = Context.new(outerContext: nil).init(); global actions2 = []; @@ -1017,8 +1532,8 @@ while(!stream2.atEnd()) { global actions2 = []; } -println("\n--------- CIRCULAR META ---------\n\n"); -println(__namespaces__.metaLanguageCircular); +//println("\n--------- CIRCULAR META ---------\n\n"); +//println(__namespaces__.metaLanguageCircular); global stream2 = newStream(readfile("rawgrammar.leg")); global context2 = Context.new(outerContext: nil).init(); @@ -1038,8 +1553,8 @@ while(!stream2.atEnd()) { global actions2 = []; } -println("\n--------- CIRCULAR PEG ---------\n\n"); -println(__namespaces__.pegCircular); +//println("\n--------- CIRCULAR PEG ---------\n\n"); +//println(__namespaces__.pegCircular); compareGrammars(grammar1, grammar2) { local success = 1; @@ -1061,25 +1576,113 @@ compareGrammars(grammar1, grammar2) { compareGrammars(#peg, #pegCircular); compareGrammars(#metaLanguage, #metaLanguageCircular); +} + //global functionCounts = []; global then = cputime(); -global stream3 = newStream(readfile("grammar_parser.meta")); +global stream3 = newStream(readfile("dowhile.meta")); global context3 = Context.new(outerContext: nil).init(); global actions3 = []; while(!stream3.atEnd()) { metaLanguage.start(stream3, context3, actions3); - res = { for (actionAndContext in actions3) { + local res = { for (actionAndContext in actions3) { actionAndContext.action.execute(actionAndContext.context); } }; - //print(res, "\n"); - //println(len(actions3)); - //println(context3.variables.s); - //eval(context3.variables.s, nil); + eval(res, env: nil); global actions3 = []; } global now = cputime(); -print("Parse time : ", now - then, "s\n"); +print("---\nFile Execution time : ", now - then, "s\n---\n"); //println(functionCounts); + +global then = cputime(); + +global streamFString = newStream(readfile("fstring.meta")); +global contextFString = Context.new(outerContext: nil).init(); +global actionsFString = []; + +while(!streamFString.atEnd()) { + metaLanguage.start(streamFString, contextFString, actionsFString); + local res = { for (actionAndContext in actionsFString) { + actionAndContext.action.execute(actionAndContext.context); + } }; + eval(res, env: nil); + global actionsFString = []; +} + +global now = cputime(); + +print("---\nFile Execution time : ", now - then, "s\n---\n"); + +global stream4 = newStream(readfile("minproto.grammar")); +global context4 = Context.new(outerContext: nil).init(); +global actions4 = []; + +addRule(grammar, definition) { + + local instructions = []; + + instructions.push(Instruction.new(OpCodes.FAIL, nil, 0, 0, 0)); + local ko = instructions.length() - 1; + + instructions.push(Instruction.new(OpCodes.SUCCEED, nil, 0, 0, 0)); + local ok = instructions.length() - 1; + + definition.expression.emitByteCode(instructions, ok, ko); + + for (n from 0 to (instructions.length() - 1 - ((instructions.length() - 1) % 2)) / 2) { + local i = instructions[n]; + instructions[n] = instructions[instructions.length() - 1 - n]; + instructions[instructions.length() - 1 - n] = i; + } + + //println(definition.name); + + for (n in instructions.length()) { + i = instructions[n]; + i.ok = instructions.length() - 1 - i.ok; + i.ko = instructions.length() - 1 - i.ko; + //print(" 0", { if (n < 10) { "0" } else { "" } }, n, " "); + //i.printline(); + } + + grammar[definition.name] = []; + for (instruction in instructions) { + grammar[definition.name].push(instruction.op); + grammar[definition.name].push(instruction.arg); + grammar[definition.name].push(instruction.arglen); + grammar[definition.name].push(instruction.ok); + grammar[definition.name].push(instruction.ko); + } +} + +minprotoVMGrammar = []; + +while(!stream4.atEnd()) { + peg.grammar(stream4, context4, actions4); + + local definition = { for (actionAndContext in actions4) { + actionAndContext.action.execute(actionAndContext.context); + } }; + if (definition == nil) continue; // end of file case + + addRule(minprotoVMGrammar, definition); + + global actions4 = []; +} + +global then = cputime(); + +input = readfile("grammar_parser.meta"); +cursor = 0; +while (yysval != 0) { + matchedCharacters = __match__(minprotoVMGrammar, #start, input, cursor); + //print(yysval, "\n"); + cursor += matchedCharacters; +} + +global now = cputime(); +print("---\nFile Parsing time : ", now - then, "s\nSpeed: ", 1688/(now-then), " lines per second\n---\n"); diff --git a/minproto.grammar b/minproto.grammar new file mode 100644 index 0000000..ac1ff8f --- /dev/null +++ b/minproto.grammar @@ -0,0 +1,316 @@ +# minproto.leg -- minimal prototype langauge for semantic experiments +# +# last edited: 2024-07-04 10:07:00 by piumarta on zora + + +start = - ( s:stmt { global yysval = s } + | !. { global yysval = 0 } + | < (!EOL .)* > { syntaxError(yytext) } + ) + +stmt = WHILE LPAREN c:expr RPAREN s:stmt { $$ = newWhile(c, s) } + | IF LPAREN c:expr RPAREN s:stmt + ( ELSE t:stmt { $$ = newIf(c, s, t ) } + | { $$ = newIf(c, s, nil) } + ) + | CONT EOS { $$ = newContinue() } + | BREAK e:expr EOS { $$ = newBreak(e) } + | BREAK EOS { $$ = newBreak(nil) } + | RETURN e:expr EOS { $$ = newReturn(e) } + | RETURN EOS { $$ = newReturn(nil) } + | FOR LPAREN i:id IN e:expr RPAREN + s:stmt { $$ = newForIn(i, e, s) } + | FOR LPAREN i:id FROM a:expr + TO b:expr RPAREN s:stmt { $$ = newForFromTo(i, a, b, s) } + | FOR LPAREN i:expr SEMI c:expr SEMI + u:expr RPAREN s:stmt { $$ = newFor(i, c, u, s) } + | TRY t:stmt + ( CATCH LPAREN i:id RPAREN c:stmt { $$ = newTryCatch(t, i, c) } + | ENSURE e:stmt { $$ = newTryEnsure(t, e) } + ) + | RAISE e:expr EOS { $$ = newRaise(e) } + | LOCAL i:id p:params b:block { $$ = newSetLocal (i, newLambda(p, b, nil, i)) } + | GLOBAL i:id p:params b:block { $$ = newSetGlobal(i, newLambda(p, b, nil, i)) } + | i:id p:params b:block { $$ = newSetVar (i, newLambda(p, b, nil, i)) } + | v:proto DOT i:id p:params b:block { $$ = newSetProp(v, i, newLambda(p, b, v, i)) } + | b:block { $$ = newBlock(b) } + | e:expr EOS { $$ = e } + +proto = v:var ( DOT j:id !LPAREN { v = newGetProp(v, j) } + )* { $$ = v } + +EOS = SEMI+ | &RBRACE | &ELSE | &CATCH + +expr = LOCAL i:id ASSIGN e:expr { $$ = newSetLocal (i, e) } + | GLOBAL i:id ASSIGN e:expr { $$ = newSetGlobal(i, e) } + | i:id ASSIGN e:expr { $$ = newSetVar (i, e) } + | l:logor ( ASSIGN r:expr { l = assign(l, r) } + | PLUSEQ r:expr { l = newBinop(opPreAdd, lvalue(l), r) } + | MINUSEQ r:expr { l = newBinop(opPreSub, lvalue(l), r) } + | STAREQ r:expr { l = newBinop(opPreMul, lvalue(l), r) } + | SLASHEQ r:expr { l = newBinop(opPreDiv, lvalue(l), r) } + | PCENTEQ r:expr { l = newBinop(opPreMod, lvalue(l), r) } + | SHLEQ r:expr { l = newBinop(opPreShl, lvalue(l), r) } + | SHREQ r:expr { l = newBinop(opPreShr, lvalue(l), r) } + | ANDEQ r:expr { l = newBinop(opPreAnd, lvalue(l), r) } + | XOREQ r:expr { l = newBinop(opPreXor, lvalue(l), r) } + | OREQ r:expr { l = newBinop(opPreOr, lvalue(l), r) } + )? { $$ = l } + +logor = l:logand ( BARBAR r:logand { l = newBinop(opLogOr, l, r) } + )* { $$ = l } + +logand = l:bitor ( ANDAND r:bitor { l = newBinop(opLogAnd, l, r) } + )* { $$ = l } + +bitor = l:bitxor ( OR r:bitxor { l = newBinop(opBitOr, l, r) } + )* { $$ = l } + +bitxor = l:bitand ( XOR r:bitand { l = newBinop(opBitXor, l, r) } + )* { $$ = l } + +bitand = l:eq ( AND r:eq { l = newBinop(opBitAnd, l, r) } + )* { $$ = l } + +eq = l:ineq ( EQ r:ineq { l = newBinop(opEq, l, r) } + | NOTEQ r:ineq { l = newBinop(opNotEq, l, r) } + )* { $$ = l } + +ineq = l:shift ( LESS r:shift { l = newBinop(opLess, l, r) } + | LESSEQ r:shift { l = newBinop(opLessEq, l, r) } + | GRTREQ r:shift { l = newBinop(opGrtrEq, l, r) } + | GRTR r:shift { l = newBinop(opGrtr, l, r) } + )* { $$ = l } + +shift = l:sum ( SHL r:sum { l = newBinop(opShl, l, r) } + | SHR r:sum { l = newBinop(opShr, l, r) } + )* { $$ = l } + +sum = l:prod ( PLUS r:prod { l = newBinop(opAdd, l, r) } + | MINUS r:prod { l = newBinop(opSub, l, r) } + )* { $$ = l } + +prod = l:prefix ( STAR r:prefix { l = newBinop(opMul, l, r) } + | SLASH r:prefix { l = newBinop(opDiv, l, r) } + | PCENT r:prefix { l = newBinop(opMod, l, r) } + ) * { $$ = l } + +prefix = PPLUS p:prefix { $$ = newBinop(opPreAdd, lvalue(p), newInteger(1)) } + | MMINUS p:prefix { $$ = newBinop(opPreSub, lvalue(p), newInteger(1)) } + | PLING p:prefix { $$ = newUnyop(opNot, p) } + | MINUS p:prefix { $$ = newUnyop(opNeg, p) } + | TILDE p:prefix { $$ = newUnyop(opCom, p) } + | BQUOTE s:stmt { $$ = newUnyop(opQuasiquote, s) } + | COMMAT e:expr { $$ = newUnyop(opUnquote, e) } + | postfix + +postfix = SUPER DOT i:id a:args { $$ = newSuper(i, a) } + | p:primary + ( LBRAK + ( COLON ( RBRAK { p = newGetSlice(p, nil, nil) } + | e:xexpr RBRAK { p = newGetSlice(p, nil, e) } + ) + | s:xexpr ( COLON ( RBRAK { p = newGetSlice(p, s, nil) } + | e:xexpr RBRAK { p = newGetSlice(p, s, e) } + ) + | RBRAK { p = newGetArray(p, s) } + ) + ) + | DOT i:id ( a:args !LBRACE { p = newInvoke(p, i, a) } + | { p = newGetProp(p, i) } + ) + | a:args !LBRACE { p = newApply(p, a) } + )* + ( PPLUS { p = newBinop(opPostAdd, lvalue(p), newInteger( 1)) } + | MMINUS { p = newBinop(opPostAdd, lvalue(p), newInteger(-1)) } + )? { $$ = p } + +args = LPAREN a:mkobj + ( RPAREN + | ( k:id COLON e:xexpr { Object_put(a, k, e) } + | e:xexpr { Object_push(a, e) } + ) + ( COMMA ( k:id COLON e:xexpr { Object_put(a, k, e) } + | e:xexpr { Object_push(a, e) } + ) )* RPAREN ) { $$ = a } + +params = LPAREN p:mkobj + ( RPAREN + | i:id ( COLON e:expr { Object_put(p, i, e) } + | { Object_push(p, i) } + ) + ( COMMA i:id ( COLON e:expr { Object_put(p, i, e) } + | { Object_push(p, i) } + ) + )* RPAREN ) { $$ = p } + +mkobj = { $$ = (global new)(pObject) } + +primary = nil | number | string | symbol | var | lambda | subexpr | literal # | regex + +lambda = p:params b:block { $$ = newLambda(p, b, nil, nil) } + +subexpr = LPAREN e:expr RPAREN { $$ = e } + | b:block { $$ = newBlock(b) } + +literal = LBRAK o:mkobj + ( RBRAK + | ( ( i:id COLON e:expr { Object_put(o, i, e) } + | e:expr { Object_push(o, e) } + ) ( COMMA ( i:id COLON e:expr { Object_put(o, i, e) } + | e:expr { Object_push(o, e) } + ) )* )? RBRAK ) { $$ = newLiteral(o) } + +block = LBRACE b:mkobj + ( e:stmt { Object_push(b, e) } + )* ( RBRACE { $$ = b } + | error @{ expected("statement or \x7D", yytext) } + ) + +nil = NIL { $$ = nil } + +number = "-" n:unsign { $$ = neg(n) } + | "+" n:number { $$ = n } + | n:unsign { $$ = n } + +unsign = < DIGIT* '.' DIGIT+ EXP? > - { $$ = newFloat(strtod(yytext, 0)) } + | "0" [bB] < BIGIT+ > - { $$ = newInteger(strtol(yytext, 0, 2)) } + | "0" [xX] < HIGIT+ > - { $$ = newInteger(strtol(yytext, 0, 16)) } + | "0" < OIGIT* > - { $$ = newInteger(strtol(yytext, 0, 8)) } + | < DIGIT+ > - { $$ = newInteger(strtol(yytext, 0, 10)) } + | "'" < char > "'" - { $$ = newInteger(_get(newStringUnescaped(yytext), String,value)[0]) } + +string = '"' < ( !'"' char )* > '"' - { $$ = newStringUnescaped(yytext) } + +char = '\\' [abefnrtv'"\[\]\\] + | '\\' [0-3][0-7][0-7] + | '\\' [xX] HIGIT* + | '\\' [0-7][0-7]? + | !'\\' . +# char = "\\" ( ["'\\abfnrtv] +# | [xX] HIGIT* +# | [0-7][0-7]?[0-7]? +# ) +# | . + +symbol = HASH i:id { $$ = i } + +var = LOCAL i:id { $$ = newGetLocal (i) } + | GLOBAL i:id { $$ = newGetGlobal(i) } + | i:id { $$ = newGetVar (i) } + +id = < LETTER ALNUM* > - { $$ = intern(yytext) } + +# regex = SLASH a:alts SLASH { $$ = a } + +# alts = s:seq ( OR t:seq { s = Alt_append(t) } +# )* { $$ = s } + +# seq = p:pre ( q:pre { s = Seq_append(t) } +# )* { $$ = s } + +# elt = action | pre + +# action = b:block { $$ = newAction(b) } + +# pre = PLING p:pre { $$ = newNot(p) } +# | AND p:pre { $$ = newAnd(p) } +# | post + +# post = a:atom ( STAR { a = newMany(a) } +# | PLUS { a = newMore(a) } +# | QUERY { a = newMore(a) } +# )? { $$ = a } + +# atom = DOT { $$ = newDot() } +# | "[" ( !"]" "\\"? . )* "]" - { $$ = newClass(yytext) } +# | '"' xxxxxx + +# class = LBRAK + +BIGIT = [0-1] +OIGIT = [0-7] +DIGIT = [0-9] +HIGIT = [0-9A-Fa-f] +LETTER = [A-Za-z_$?] +ALNUM = LETTER | DIGIT +SIGN = [-+] +EXP = [eE] SIGN DIGIT+ + +- = SPACE* + +SPACE = [ \t] | EOL | SLC | MLC +EOL = [\n\r] { ++lineno } +SLC = "//" (!EOL .)* +MLC = "/*" ( MLC | !"*/" (EOL | .))* "*/" - + +NIL = "nil" !ALNUM - +WHILE = "while" !ALNUM - +IF = "if" !ALNUM - +ELSE = "else" !ALNUM - +FOR = "for" !ALNUM - +IN = "in" !ALNUM - +FROM = "from" !ALNUM - +TO = "to" !ALNUM - +CONT = "continue" !ALNUM - +BREAK = "break" !ALNUM - +RETURN = "return" !ALNUM - +TRY = "try" !ALNUM - +CATCH = "catch" !ALNUM - +ENSURE = "ensure" !ALNUM - +RAISE = "raise" !ALNUM - +GLOBAL = "global" !ALNUM - +LOCAL = "local" !ALNUM - +SUPER = "super" !ALNUM - + +BQUOTE = "`" - +COMMAT = "@" - +HASH = "#" - +SEMI = ";" - +ASSIGN = "=" ![=] - +COMMA = "," - +COLON = ":" ![:] - +LPAREN = "(" - +RPAREN = ")" - +LBRAK = "[" - +RBRAK = "]" - +LBRACE = "{" - +RBRACE = "}" - +BARBAR = "||" ![=] - +ANDAND = "&&" ![=] - +OR = "|" ![|=] - +OREQ = "|=" - +XOR = "^" ![=] - +XOREQ = "^=" - +AND = "&" ![&=] - +ANDEQ = "&=" - +EQ = "==" - +NOTEQ = "!=" - +LESS = "<" ![<=] - +LESSEQ = "<=" - +GRTREQ = ">=" - +GRTR = ">" ![=] - +SHL = "<<" ![=] - +SHLEQ = "<<=" - +SHR = ">>" ![=] - +SHREQ = ">>=" - +PLUS = "+" ![+=] - +PLUSEQ = "+=" - +PPLUS = "++" - +MINUS = "-" ![-=] - +MINUSEQ = "-=" - +MMINUS = "--" - +STAR = "*" ![=] - +STAREQ = "*=" - +SLASH = "/" ![/=] - +SLASHEQ = "/=" - +PCENT = "%" ![=] - +PCENTEQ = "%=" - +DOT = "." ![.] - +PLING = "!" ![=] - +TILDE = "~" - + +error = - < (!EOL .)* > + +xexpr = expr | error @{ expected("expression", yytext) } + diff --git a/minproto.leg b/minproto.leg index d78dcc2..b3deecd 100644 --- a/minproto.leg +++ b/minproto.leg @@ -3394,10 +3394,12 @@ void expected(char *what, char *where) fatal("syntax error: %s expected near: %s", what, where); } +#define global + %} -start = - ( s:stmt { yysval = s } - | !. { yysval = 0 } +start = - ( s:stmt { global yysval = s } + | !. { global yysval = 0 } | < (!EOL .)* > { syntaxError(yytext) } ) @@ -3537,7 +3539,7 @@ params = LPAREN p:mkobj ) )* RPAREN ) { $$ = p } -mkobj = { $$ = new(pObject) } +mkobj = { $$ = (global new)(pObject) } primary = nil | number | string | symbol | var | lambda | subexpr | literal # | regex @@ -3575,11 +3577,16 @@ unsign = < DIGIT* '.' DIGIT+ EXP? > - { $$ = newFloat(strtod(yytext, 0 string = '"' < ( !'"' char )* > '"' - { $$ = newStringUnescaped(yytext) } -char = "\\" ( ["'\\abfnrtv] - | [xX] HIGIT* - | [0-7][0-7]?[0-7]? - ) - | . +char = '\\' [abefnrtv'"\[\]\\] + | '\\' [0-3][0-7][0-7] + | '\\' [xX] HIGIT* + | '\\' [0-7][0-7]? + | !'\\' . +# char = "\\" ( ["'\\abfnrtv] +# | [xX] HIGIT* +# | [0-7][0-7]?[0-7]? +# ) +# | . symbol = HASH i:id { $$ = i } @@ -3695,7 +3702,6 @@ SLASHEQ = "/=" - PCENT = "%" ![=] - PCENTEQ = "%=" - DOT = "." ![.] - -DOTDOT = ".." - PLING = "!" ![=] - TILDE = "~" - @@ -3705,6 +3711,7 @@ xexpr = expr | error @{ expected("expression", yytext) } %% ; +#undef global #if PROFILE @@ -4389,7 +4396,8 @@ oop prim_eval(oop func, oop self, oop args, oop env) oop *indexed = _get(args, Object,indexed); oop result = nil; - if (nil != Object_getLocal(args, sym_env)) { + //if (nil != Object_getLocal(args, sym_env)) { + if (Object_find(args, sym_env) >= 0) { env = Object_getLocal(args, sym_env); } @@ -4908,7 +4916,7 @@ typedef struct vmState oop variables; } vmState; -#define VM_STATE_INITIALISER { nil, nil } +#define VM_STATE_INITIALISER { nil, new(pObject) } void vmEnter(vmState *state, oop obj, char *yytext, int yyleng) { @@ -4923,7 +4931,7 @@ void vmSet(vmState *state, oop obj, char *yytext, int yyleng) void vmAction(vmState *state, oop obj, char *yytext, int yyleng) { - oop text = yyleng ? newStringLen(yytext, yyleng) : nil; + oop text = yyleng >= 0 ? newStringLen(yytext, yyleng) : nil; Object_put(state->variables, sym_yytext, text); Object_put(state->variables, sym_yyleng, newInteger(yyleng)); applyThunkIn(obj, state->variables); @@ -4933,6 +4941,7 @@ void vmLeave(vmState *state, oop obj, char *yytext, int yyleng) { state->result = Object_getLocal(state->variables, sym_$$); state->variables = _getDelegate(state->variables); + Object_put(state->variables, sym_$$, state->result); } void vmDisassemble(vmInsn *code, int pc) @@ -5130,6 +5139,8 @@ int vmRun(oop grammar0, oop symbol, char *text, int start, int length) C##stack[--C##sp]; \ }) + saveAction(vmEnter, nil, 0, 0); + for (;;) { if (opt_d) vmDisassemble(frame.code, frame.pc); vmInsn *i = frame.code + frame.pc++; @@ -5278,8 +5289,6 @@ int vmRun(oop grammar0, oop symbol, char *text, int start, int length) break; } - saveAction(vmLeave, nil, 0, 0); - #undef pop #undef drop #undef push @@ -5457,16 +5466,16 @@ int main(int argc, char **argv) #define stringify(x) #x -#define declareOp(NAME, OP) _set(intern(stringify(__op##NAME)), Symbol,value, newInteger(op##NAME)); +#define declareOp(NAME, OP) _set(intern(stringify(op##NAME)), Symbol,value, newInteger(op##NAME)); doBinops(declareOp) #undef declareOp -#define declareOp(NAME, OP) _set(intern(stringify(__##NAME)), Symbol,value, newInteger(NAME)); +#undef stringify + +#define declareOp(NAME, OP) _set(intern(#NAME), Symbol,value, newInteger(NAME)); doUnyops(declareOp) #undef declareOp -#undef stringify - #if TYPECODES # define defineEvaluator(NAME) \ diff --git a/rawgrammar.leg b/rawgrammar.leg index d232fc9..5f338c8 100644 --- a/rawgrammar.leg +++ b/rawgrammar.leg @@ -9,11 +9,12 @@ expression = s:sequence !BAR { s; } ( BAR s2:sequence { s1.push(s2); } ) * { s1; } -sequence = p:prefix { p = Sequence.new().push(p); } +sequence = p:prefix ( q:prefix { p = Sequence.new().push(p).push(q); } ( q:prefix { p.push(q); } - ) * { p; } + ) * ) ? { p; } prefix = AND a:action { ParseTimeAction.new(action: a); } + | AT a:action { ExecuteAction.new(action: a); } | ( AND s:suffix { And.new(expression: s); } | NOT s:suffix { Not.new(expression: s); } @@ -33,8 +34,7 @@ primary = i1:identifier COLON i2:ruleCall !ASSIGN { Assignment.new(name: | c:class { c; } | DOT { Dot.new(); } | a:action { a; } - | BEGIN { Begin.new(); } - | END { End.new(); } + | BEGIN e:expression END { Capture.new(expression: e); } identifier = < [-a-zA-Z_][-a-zA-Z_0-9]* > - { intern(yytext); } @@ -53,7 +53,7 @@ char = '\\' [abefnrtv'"\[\]\\] | '\\' [0-7][0-7]? | !'\\' . -action = m:metaLanguage::block - { Action.new(parseTree: m); } +action = m:metaLanguage::block - { Action.new(parseTree: Block.new(body: m)); } - = ( space | comment )* space = ' ' | '\t' | end-of-line @@ -76,3 +76,4 @@ PLUS = "+" ![+=] - STAR = "*" ![=] - DOT = "." ![.] - COLON = ":" ![:] - +AT = "@" - diff --git a/rawminproto.leg b/rawminproto.leg index 3c3021b..99c0254 100644 --- a/rawminproto.leg +++ b/rawminproto.leg @@ -152,7 +152,7 @@ literal = LBRAK o:mkobj block = LBRACE b:mkobj ( e:stmt { b.push(e) } - )* RBRACE { Block.new(body: b) } + )* RBRACE { b } nil = NIL { nil } @@ -167,13 +167,21 @@ unsign = < DIGIT* '.' DIGIT+ EXP? > - { yytext.asFloat() } | < DIGIT+ > - { yytext.asInteger() } | "'" < char > "'" - { ord(yytext.unescaped()) } -string = '"' < ( !'"' char )* > '"' - { yytext.unescaped() } +string = '"' < ( !'"' char )* > '"' - { yytext } -char = "\\" ( ["'\\abfnrtv] - | [xX] HIGIT* - | [0-7][0-7]?[0-7]? - ) - | . +# Version originale, qui ne parse pas deux antislash pour une raison obscure +# char = "\\" ( ["'\\abfnrtv] +# | [xX] HIGIT* +# | [0-7][0-7]?[0-7]? +# ) +# | . + +# Version de rawgrammar.leg, qui fonctionne sans problème +char = '\\' [abefnrtv'"\[\]\\] + | '\\' [0-3][0-7][0-7] + | '\\' [0-7][0-7]? + | '\\' [xX] HIGIT+ + | !'\\' . symbol = HASH i:id { i } @@ -187,7 +195,7 @@ BIGIT = [0-1] OIGIT = [0-7] DIGIT = [0-9] HIGIT = [0-9A-Fa-f] -LETTER = [A-Za-z_] +LETTER = [A-Za-z_$?] ALNUM = LETTER | DIGIT SIGN = [-+] EXP = [eE] SIGN DIGIT+