# Copyright 2005 Kevin Reid, under the terms of the MIT X license # found at http://www.opensource.org/licenses/mit-license.html ................ pragma.enable("easy-return") pragma.disable("explicit-result-guard") pragma.enable("dot-props") pragma.enable("verb-curry") pragma.enable("accumulator") pragma.enable("trinary-define") def makeLALR1Parser := def makeParseError := makeLALR1Parser::makeParseError def EOF := makeLALR1Parser::EOF def mapStream := def EventuallyDeepFrozen := DeepFrozen."eventually"() def mapV implements DeepFrozen { to run(f, coll) { return accum [] for v in coll {_.with(f(v))}}} def listAction implements DeepFrozen { to run(item, list) { return [item] + list }} def ToProblem implements DeepFrozen { to coerce(specimen, optEjector) { if (Ref.optProblem(specimen) =~ p :notNull) { return p } else { throw.eject(optEjector, E.toQuote(specimen) + " is not broken") } } } def lex implements DeepFrozen { to run(textIn) { # XXX get a better lexer library/pattern var text := textIn.read(EIO::ALL, EIO::ALL) #traceln(`text is ->$text<-, match ${text =~ rx`(?s)"(@mid.*)`} ->$mid<-`) def tokens := [].diverge() while (!(text <=> "")) { switch (text) { match rx`(?s)\s+(@r.*)` { text := r } match rx`(?s)\.(@r.*)` { text := r tokens.push(["end", null]) } match rx`(?s):=(@r.*)` { text := r tokens.push(["derives", null]) } match rx`(?s)=>(@r.*)` { text := r tokens.push(["yields", null]) } match rx`(?s)(@t[$$@@])\{(@n[0-9+]+)\}(@r.*)` { text := r tokens.push(["Quasi" + t, __makeInt(n)]) } match rx`(?s)(@t[a-zA-Z_][a-zA-Z_0-9-]*)(@r.*)` { # XXX Unicode character categories # XXX the character syntax here was arbitrarily copied from the TermLexer text := r tokens.push(["symbol", t]) } match rx`(?s)"(@mid.*)` { def lexerRet := __return def rx`(?s)(@t(?:[^@@$$"]|@@@@|$$$$|\\[\\"])*)"(@r.*)` := \ (mid, def fail(p) { lexerRet(Ref.broken(`unclosed/malformed string literal ($p): "$mid`)) }) # XXX doesn't handle \ text := r def unquote := t.replaceAll("@@", "@").replaceAll("$$", "$").replaceAll("\\\\", "\\").replaceAll("\\\"", "\"") tokens.push(["string", t]) } match _ { return Ref.broken(`unexpected grammar token: $text`) } } } return tokens.snapshot().asStream() }} def metaparser := makeLALR1Parser( "grammar", "grammar", ["string", "derives", "yields", "symbol", "Quasi$", "end"], [["grammar", ["string", "derives", "item", "end", "productions"], def _(name, _, start, _, qproductions) { def terminalSymbolsFlex := [].diverge() if (start =~ startTerminal :int) { terminalSymbolsFlex.push(startTerminal) } def productions := accum [] for [left, right] + actionList in qproductions {_.with( # collect the indexes of values resulting from terminals, # which have no actions and therefore don't have nor need # our qpIntermediate gimmick var leaves := [].asSet() for argPosition => quasiSym :int in right { terminalSymbolsFlex.push(quasiSym) leaves with= argPosition } # cl-yacc doesn't allow parameterizing the actions after # the grammar has been built, so the quasiStubAction # generates closures which are supplied with the # ValueMaker#substitute args after the parse finishes def quasiStubAction { match [=="run", args] { def qpIntermediate(qSubArgs) { def userArgs := \ accum [] for i => parg in args { _.with( if (leaves.contains(i)) { # a value from a terminal parg } else { # parg is another qpIntermediate parg(qSubArgs) } )} return \ if (actionList =~ [actionFnIndex]) { E.call(qSubArgs[actionFnIndex], "run", userArgs) } else { userArgs } } } } [left, right, quasiStubAction] )} def terminalSymbols := terminalSymbolsFlex.snapshot() def ePlainParser := makeLALR1Parser(name, start, terminalSymbols, productions, ["tokenFunction" => __identityFunc]) def valueMaker { to substitute(args :List) { def terminalMap := accum [].asMap() for x in terminalSymbols { _.with(args[x], x) } require(terminalMap.size() == terminalSymbols.size(), thunk {`duplicate terminals in $args`}) # this could be done more simply with a strict 'with' def quasiResultParser implements EventuallyDeepFrozen { # must be E.D.F. since args can be anything method __optSealedDispatch(brand) :any { if (brand == EventuallyDeepFrozen.getPeekBrand()) { EventuallyDeepFrozen.getPeekSealer().seal(meta.getState()) } } to __printOn(tw :TextWriter) { tw.write("<") tw.print(name) tw.write(" quasi-defined LALR(1) parser>") } to parse(stream) { def intermediate := ePlainParser.parse(mapStream(any, stream, def quasiParserSymbolizer(token) { def [sym, val] := token return [terminalMap.fetch(sym, thunk{throw(`${E.toQuote(sym)} is not a terminal symbol in $quasiResultParser`)}), val] })) return \ switch (intermediate) { match p :ToProblem { def unmap(psym) { return if (psym == EOF) { psym } else { args[psym] } } Ref.broken(makeParseError([ "found" => unmap(p::found), "expected" => mapV(unmap, p::expected).asSet()])) } match good { good.run(args) } } } } return quasiResultParser } } return valueMaker }], ["productions", [], thunk{[]}], ["productions", ["production", "productions"], listAction], ["production", ["symbol", "derives", "seq", "end"], def _(symbol,_,seq,_) { return [symbol, seq] }], ["production", ["symbol", "derives", "seq", "yields", "Quasi$", "end"], def _(symbol,_,seq,_,actionIndex,_) { return [symbol, seq, actionIndex] }], ["seq", [], thunk{[]}], ["seq", ["item", "seq"], listAction], ["item", ["symbol"], __identityFunc], ["item", ["Quasi$"], __identityFunc]], ["tokenFunction" => __identityFunc]) (def lalr1__quasiParser implements DeepFrozen { to valueMaker(template :String) { return metaparser.parse(lex(template.asStream())) } to matchMaker(_) { throw(`${[meta.context().getFQNPrefix().split("$")[0]]} cannot be used in a pattern`) } })