From 1ec2c7d8ef17db1c5c7e66d97e48faaa7c96a517 Mon Sep 17 00:00:00 2001 From: MaximeBarniaudy Date: Fri, 17 May 2024 14:22:39 +0900 Subject: [PATCH] parser circularity --- grammar_parser.meta | 62 +++++++++++++------- minproto.leg | 4 +- rawgrammar.leg | 134 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 20 deletions(-) create mode 100644 rawgrammar.leg diff --git a/grammar_parser.meta b/grammar_parser.meta index 70df33b..996cd11 100644 --- a/grammar_parser.meta +++ b/grammar_parser.meta @@ -20,7 +20,6 @@ newStream(string) { position: 0, limit: len(string) ); - print("Created new stream object: { position: ", self.position, ", limit: ", self.limit, ", !atEnd(): ", !self.atEnd(), " }\n"); self; } @@ -174,7 +173,7 @@ Begin.match(stream, context, rules, actions) { End = Object.subtype(#End); End.match(stream, context, rules, actions) { - context.variables.yytext = stream.content[stream.lastBegin..stream.position]; + context.variables.yytext = stream.content[stream.lastBegin..stream.position].unescaped(); 1; } @@ -311,6 +310,19 @@ Action.execute(context) { context.outerContext.variables[context.returnValueName] = returnValue; } + returnValue; +} + +// Parse-time Action + +ParseTimeAction = Object.subtype(#ParseTimeAction); + +ParseTimeAction.match(stream, context, rules, actions) { + if(self.action.execute(context)) { + 1; + } else { + 0; + } } // Assignment @@ -328,6 +340,7 @@ Assignment.match(stream, context, rules, actions) { RuleCall = Object.subtype(#RuleCall); RuleCall.match(stream, context, rules, actions) { + if (rules[self.name] == nil) { print("Trying to call undefined rule: ", self.name, "\n"); exit(); } rules[self.name].match(stream, context, rules, actions); } @@ -348,7 +361,7 @@ rules.grammar = Sequence.new() .push(Action.new(parseTree: `{ g[d.name] = d.expression; })) )) .push(RuleCall.new(name: #end_of_file)) - .push(Action.new(parseTree: `{ print(g, full: Object.new()); })); + .push(Action.new(parseTree: `{ g; })); Definition = Object.subtype(#Definition); @@ -503,7 +516,7 @@ rules.identifier = Sequence.new() .push(Star.new(expression: CharacterClass.new(value: "-a-zA-Z_0-9"))) .push(End.new()) .push(RuleCall.new(name: #ws)) - .push(Action.new(parseTree: `{ yytext })); + .push(Action.new(parseTree: `{ intern(yytext); })); // ruleCallIdent = < [-a-zA-Z_][-a-zA-Z_0-9]* > - { newIdentifier(yytext); } @@ -529,7 +542,7 @@ rules.literal = Alternation.new() .push(End.new()) .push(CharacterClass.new(value: "\'")) .push(RuleCall.new(name: #ws)) - .push(Action.new(parseTree: `{ StringLiteral.new(value: yytext); })) + .push(Action.new(parseTree: `{ StringLiteral.new(string: yytext); })) ) .push(Sequence.new() .push(CharacterClass.new(value: '\"')) @@ -541,7 +554,7 @@ rules.literal = Alternation.new() .push(End.new()) .push(CharacterClass.new(value: '\"')) .push(RuleCall.new(name: #ws)) - .push(Action.new(parseTree: `{ StringLiteral.new(value: yytext); })) + .push(Action.new(parseTree: `{ StringLiteral.new(string: yytext); })) ); // class = '[' < ( !']' range )* > ']' - { newCharacterClass(yytext); } @@ -578,10 +591,7 @@ rules.range = Alternation.new() rules.char = Alternation.new() .push(Sequence.new() .push(StringLiteral.new(string: "\\")) - .push(Alternation.new() - .push(CharacterClass.new(value: "abefnrtv\'[]\\")) - .push(CharacterClass.new(value: '\"')) // I hate this - ) + .push(CharacterClass.new(value: "abefnrtv\'\"[]\\")) ) .push(Sequence.new() .push(StringLiteral.new(string: "\\")) @@ -606,13 +616,13 @@ rules.action = Sequence.new() .push(RuleCall.new(name: #ws)) .push(Action.new(parseTree: `{ Action.new(parseTree: m); })); -// metaStatement = b:metaBlock { newBlock(b); } +// metaStatement = b:metaBlock { b; } // | e:metaExpression SEMI { e; } rules.metaStatement = Alternation.new() .push(Sequence.new() .push(Assignment.new(name: #b, rule: RuleCall.new(name: #metaBlock))) - .push(Action.new(parseTree: `{ Block.new(b); })) + .push(Action.new(parseTree: `{ b; })) ) .push(Sequence.new() .push(Assignment.new(name: #e, rule: RuleCall.new(name: #metaExpression))) @@ -651,7 +661,7 @@ rules.metaExpression = Alternation.new(id: 1234) .push(Assignment.new(name: #i, rule: RuleCall.new(name: #metaId))) .push(RuleCall.new(name: #assign)) .push(Assignment.new(name: #e, rule: RuleCall.new(name: #metaExpression))) - .push(Action.new(parseTree: `{ SetVar.new(name: i, expr: e); })) + .push(Action.new(parseTree: `{ SetVar.new(name: i, value: e); })) ) .push(Sequence.new() .push(Assignment.new(name: #pf, rule: RuleCall.new(name: #metaPostfix))) @@ -775,7 +785,7 @@ rules.metaBlock = Sequence.new() ) ) .push(RuleCall.new(name: #rbrace)) - .push(Action.new(parseTree: `{ b; })); + .push(Action.new(parseTree: `{ Block.new(body: b); })); // metaVar = i:metaId { newGetVar(i); } // metaId = < LETTER ALNUM* > - { intern(yytext); } @@ -908,7 +918,6 @@ rules.ws = Star.new(expression: Alternation.new() // ----- Main ----- stream = newStream(readfile("rawgrammar.leg")); -//stream = newStream("rule1 = rule2* | b:rule3 {a = b;} rule4 = 'hello' "); context = Context.new(outerContext: nil).init(); actions = []; @@ -916,9 +925,24 @@ print("\nMatching : ", rules.grammar.match(stream, context, rules, actions), "\n // Execute all actions after all matching -for (actionAndContext in actions) { - //println(actionAndContext.action.parseTree); + +grammar = { for (actionAndContext in actions) { actionAndContext.action.execute(actionAndContext.context); -} -println(); +}}; + +println(grammar); + +stream2 = newStream(readfile("rawgrammar.leg")); +context2 = Context.new(outerContext: nil).init(); +actions2 = []; + +print("\nMatching : ", grammar.grammar.match(stream2, context2, grammar, actions2), "\n"); + +grammar2 = { + for (actionAndContext in actions2) { + grammar2 = actionAndContext.action.execute(actionAndContext.context); + } +}; + +println(grammar2); diff --git a/minproto.leg b/minproto.leg index 2ec032c..006c917 100644 --- a/minproto.leg +++ b/minproto.leg @@ -501,6 +501,8 @@ oop newStringUnescaped(char *string) case 'r' : c = '\r'; break; case 't' : c = '\t'; break; case 'v' : c = '\v'; break; + case '[' : c = '[' ; break; + case ']' : c = ']' ; break; case 'X' : case 'x' : c = readCharValue(&string, 16, -1); break; case '0'...'7': --string; c = readCharValue(&string, 8, 3); break; @@ -4325,7 +4327,7 @@ oop prim_intern(oop func, oop self, oop args, oop env) fatal("intern: argument is not of type String, got %s instead", getTypeName(indexed[0])); } - return intern(_get(indexed[0], String, value)); + return intern(String_content(indexed[0])); } oop prim_print(oop func, oop self, oop args, oop env) diff --git a/rawgrammar.leg b/rawgrammar.leg new file mode 100644 index 0000000..7a61225 --- /dev/null +++ b/rawgrammar.leg @@ -0,0 +1,134 @@ +grammar = - g:mklist + ( d:definition { g[d.name] = d.expression; } + ) + end-of-file { g; } + +definition = i:identifier ASSIGN e:expression SEMI? { Definition.new(name: i, expression: e); } + +expression = s1:sequence { s1 = Alternation.new().push(s1); } + ( BAR s2:sequence { s1.push(s2); } + ) * { s1; } + +sequence = p:prefix { p = Sequence.new().push(p); } + ( q:prefix { p.push(q); } + ) * { p; } + +prefix = AND a:action { ParseTimeAction.new(action: a); } + | + ( AND s:suffix { And.new(expression: s); } + | NOT s:suffix { Not.new(expression: s); } + | s:suffix { s; } + ) + +suffix = p:primary + ( QUERY { p = Optional.new(expression: p); } + | STAR { p = Star.new(expression: p); } + | PLUS { p = Plus.new(expression: p); } + ) ? { p; } + +primary = i1:identifier COLON i2:ruleCallIdent !ASSIGN { Assignment.new(name: i1, rule: i2); } + | i:ruleCallIdent !ASSIGN { i; } + | LPAREN e:expression RPAREN { e; } + | l:literal { l; } + | c:class { c; } + | DOT { Dot.new(); } + | a:action { a; } + | BEGIN { Begin.new(); } + | END { End.new(); } + +identifier = < [-a-zA-Z_][-a-zA-Z_0-9]* > - { intern(yytext); } + +ruleCallIdent = < [-a-zA-Z_][-a-zA-Z_0-9]* > - { RuleCall.new(name: intern(yytext)); } + +literal = ['] < ( !['] char )* > ['] - { StringLiteral.new(value: yytext); } + | ["] < ( !["] char )* > ["] - { StringLiteral.new(value: yytext); } + +class = '[' < ( !']' range )* > ']' - { CharacterClass.new(value: yytext); } + +range = char '-' char | char + +char = '\\' [abefnrtv'"\[\]\\] + | '\\' [0-3][0-7][0-7] + | '\\' [0-7][0-7]? + | !'\\' . + +action = m:metaBlock - { Action.new(parseTree: m); } + +- = ( space | comment )* +space = ' ' | '\t' | end-of-line +comment = '#' ( !end-of-line . )* end-of-line +end-of-line = '\r\n' | '\n' | '\r' +end-of-file = !. + +metaStatement = b:metaBlock { b; } + | e:metaExpression SEMI { e; } + +metaExpression = p:metaPrimary + ( DOT i:metaId ASSIGN e:metaExpression # { $$ = newSetProp(p, i, e) } + | LBRAK i:metaExpression RBRAK ASSIGN e:metaExpression { SetArray.new(object: p, index: i, value: e); } + ) + | i:metaId ASSIGN e:metaExpression { SetVar.new(name: i, value: e); } + | pf:metaPostfix { pf; } + +metaPostfix = p:metaPrimary + ( DOT i:metaId a:args !ASSIGN !LBRACE { p = Invoke.new(self: p, method: i, arguments: a); } + | DOT i:metaId !ASSIGN { p = GetProp.new(object: p, key: i); } + | a:args !ASSIGN !LBRACE { p = Call.new(function: p, arguments: a); } + ) * { p; } + +args = LPAREN a:mklist + ( + ( k:metaId COLON e:metaExpression { a[k] = e; } + | e:metaExpression { a.push(e); } + ) + ( COMMA + ( k:metaId COLON e:metaExpression { a[k] = e; } + | e:metaExpression { a.push(e); } + ) + ) * + ) ? RPAREN { a; } + +mklist = { Object.new(); } + +metaPrimary = nil | metaVar | metaSubExpr + +metaSubExpr = LPAREN e:metaExpression RPAREN { e; } + +metaBlock = LBRACE b:mklist + ( e:metaStatement { b.push(e); } + ) * RBRACE { Block.new(body: b); } + +nil = NIL { nil; } + +metaVar = i:metaId { GetVar.new(name: i); } + +metaId = < LETTER ALNUM* > - { intern(yytext); } + +DIGIT = [0-9] +LETTER = [A-Za-z_] +ALNUM = LETTER | DIGIT + +BAR = '|' - +NOT = '!' - +QUERY = '?' - +BEGIN = '<' - +END = '>' - +TILDE = '~' - +RPERCENT = '%}' - + +NIL = "nil" !ALNUM - + +SEMI = ";" - +ASSIGN = "=" ![=] - +COMMA = "," - +COLON = ":" - +LPAREN = "(" - +RPAREN = ")" - +LBRAK = "[" - +RBRAK = "]" - +LBRACE = "{" - +RBRACE = "}" - +AND = "&" ![&=] - +PLUS = "+" ![+=] - +STAR = "*" ![=] - +DOT = "." ![.] - +