Ich benutze antlr4, um ein Python-Ziel mit der Python3.g4-Grammatikdatei aus dem Antlr-Grammatik-Repo zu generieren. Die generierte Python3Lexer.py-Datei enthielt Java-Code, den ich in Python übersetzen musste. Hier sind die beiden Java-Segmente es ausgegeben wird, können Sie sie finden sowohl in der python3 Grammatikdatei here alsoÜbersetzen von Java zu Python in antlr4 python3 target
// A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>();
// The stack that keeps track of the indentation level.
private java.util.Stack<Integer> indents = new java.util.Stack<>();
// The amount of opened braces, brackets and parenthesis.
private int opened = 0;
// The most recently produced token.
private Token lastToken = null;
@Override
public void emit(Token t) {
super.setToken(t);
tokens.offer(t);
}
@Override
public Token nextToken() {
// Check if the end-of-file is ahead and there are still some DEDENTS expected.
if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
// Remove any trailing EOF tokens from our buffer.
for (int i = tokens.size() - 1; i >= 0; i--) {
if (tokens.get(i).getType() == EOF) {
tokens.remove(i);
}
}
// First emit an extra line break that serves as the end of the statement.
this.emit(commonToken(Python3Parser.NEWLINE, "\n"));
// Now emit as much DEDENT tokens as needed.
while (!indents.isEmpty()) {
this.emit(createDedent());
indents.pop();
}
// Put the EOF back on the token stream.
this.emit(commonToken(Python3Parser.EOF, "<EOF>"));
}
Token next = super.nextToken();
if (next.getChannel() == Token.DEFAULT_CHANNEL) {
// Keep track of the last token on the default channel.
this.lastToken = next;
}
return tokens.isEmpty() ? next : tokens.poll();
}
private Token createDedent() {
CommonToken dedent = commonToken(Python3Parser.DEDENT, "");
dedent.setLine(this.lastToken.getLine());
return dedent;
}
private CommonToken commonToken(int type, String text) {
int stop = this.getCharIndex() - 1;
int start = text.isEmpty() ? stop : stop - text.length() + 1;
return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
}
static int getIndentationCount(String spaces) {
int count = 0;
for (char ch : spaces.toCharArray()) {
switch (ch) {
case '\t':
count += 8 - (count % 8);
break;
default:
// A normal space char.
count++;
}
}
return count;
}
boolean atStartOfInput() {
return super.getCharPositionInLine() == 0 && super.getLine() == 1;
}
und
String newLine = getText().replaceAll("[^\r\n\f]+", "");
String spaces = getText().replaceAll("[\r\n\f]+", "");
int next = _input.LA(1);
if (opened > 0 || next == '\r' || next == '\n' || next == '\f' || next == '#') {
// If we're inside a list or on a blank line, ignore all indents,
// dedents and line breaks.
skip();
}
else {
emit(commonToken(NEWLINE, newLine));
int indent = getIndentationCount(spaces);
int previous = indents.isEmpty() ? 0 : indents.peek();
if (indent == previous) {
// skip indents of the same size as the present indent-size
skip();
}
else if (indent > previous) {
indents.push(indent);
emit(commonToken(Python3Parser.INDENT, spaces));
}
else {
// Possibly emit more than 1 DEDENT token.
while(!indents.isEmpty() && indents.peek() > indent) {
this.emit(createDedent());
indents.pop();
}
}
}
übersetzte ich diese selbst zu:
# A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
tokens = deque()
# The stack that keeps track of the indentation level.
# https://docs.python.org/3/tutorial/datastructures.html#using-lists-as-stacks
indents = []
# The amount of opened braces, brackets and parenthesis.
opened = 0
# The most recently produced token.
lastToken = None
def emit(self, t):
self._token = t
self.tokens.append(t)
def nextToken(self):
# Check if the end-of-file is ahead and there are still some DEDENTS expected.
if self._input.LA(1) == Token.EOF and self.indents.size() != 0:
# Remove any trailing EOF tokens from our buffer.
for i in range(tokens.size() - 1, 0, -1):
if self.tokens[i].getType() == Token.EOF:
self.tokens.remove(i)
# First emit an extra line break that serves as the end of the statement.
self.emit(commonToken(Python3Parser.NEWLINE, "\n"))
# Now emit as much DEDENT tokens as needed.
while self.indents.size() != 0:
self.emit(createDedent())
self.indents.pop()
# Put the EOF back on the token stream.
self.emit(commonToken(Python3Parser.EOF, "<EOF>"))
next = self.nextToken()
if next.getChannel() == Token.DEFAULT_CHANNEL:
# Keep track of the last token on the default channel.
self.lastToken = next
return next if self.tokens.size() == 0 else self.tokens.popleft()
def createDedent():
dedent = commonToken(Python3Parser.DEDENT, "")
dedent.setLine(self.lastToken.getLine())
return dedent
def commonToken(self, type, text):
stop = self.getCharIndex() - 1
start = stop if text.size() == 0 else stop - text.size() + 1
return CommonToken(self._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop)
def getIndentationCount(spaces):
count = 0
for ch in spaces:
if ch == '\t':
count += 8 - (count % 8)
break
else:
# A normal space char.
count = count + 1
return count
def atStartOfInput(self):
return self.getCharPositionInLine() == 0 and self.getLine() == 1
und
newLine = getText().replaceAll("[^\r\n\f]+", "")
spaces = getText().replaceAll("[\r\n\f]+", "")
next = self._input.LA(1)
if opened > 0 or next == '\r' or next == '\n' or next == '\f' or next == '#':
# If we're inside a list or on a blank line, ignore all indents,
# dedents and line breaks.
skip()
else:
emit(commonToken(NEWLINE, newLine))
indent = getIndentationCount(spaces)
previous = 0 if indents.isEmpty() else indents.peek()
if indent == previous:
# skip indents of the same size as the present indent-size
skip()
elif indent > previous:
indents.push(indent)
emit(commonToken(Python3Parser.INDENT, spaces))
else:
# Possibly emit more than 1 DEDENT token.
while not indents.isEmpty() and indents.peek() > indent:
self.emit(createDedent())
indents.pop()
und das ist mein Python-Skript, die antlr Ausgabe mit dem Python innen anstelle des Java-Schnipsel laufen. mit dem Ran-Befehl python main.py test.py
import sys
from antlr4 import *
from Python3Lexer import Python3Lexer
from Python3Parser import Python3Parser
from Python3Listener import Python3Listener
class FuncPrinter(Python3Listener):
def enterFuncdef(self, ctx):
print("Oh, a func")
def main(argv):
input = FileStream(argv[1])
lexer = Python3Lexer(input)
stream = CommonTokenStream(lexer)
parser = Python3Parser(stream)
tree = parser.funcdef()
printer = KeyPrinter()
walker = ParseTreeWalker()
walker.walk(printer, tree)
if __name__ == '__main__':
main(sys.argv)
Es Fehler und drucken Sie die folgende Spur
Traceback (most recent call last):
File "main.py", line 24, in <module>
main(sys.argv)
File "main.py", line 17, in main
tree = parser.parameters()
File "...\antler-test\Python3Parser.py", line 1297, in parameters
self.enterRule(localctx, 14, self.RULE_parameters)
File "...\antler-test\antlr4\Parser.py", line 358, in enterRule
self._ctx.start = self._input.LT(1)
File "...\antler-test\antlr4\CommonTokenStream.py", line 61, in LT
self.lazyInit()
File "...\antler-test\antlr4\BufferedTokenStream.py", line 186, in lazyInit
self.setup()
File "...\antler-test\antlr4\BufferedTokenStream.py", line 189, in setup
self.sync(0)
File "...\antler-test\antlr4\BufferedTokenStream.py", line 111, in sync
fetched = self.fetch(n)
File "...\antler-test\antlr4\BufferedTokenStream.py", line 123, in fetch
t = self.tokenSource.nextToken()
File "...\antler-test\Python3Lexer.py", line 698, in nextToken
next = self.nextToken()
File "...\antler-test\Python3Lexer.py", line 698, in nextToken
next = self.nextToken()
File "...\antler-test\Python3Lexer.py", line 698, in nextToken
next = self.nextToken()
[Previous line repeated 985 more times]
File "...\antler-test\Python3Lexer.py", line 680, in nextToken
if self._input.LA(1) == Token.EOF and self.indents.size() != 0:
File "...\antler-test\antlr4\InputStream.py", line 49, in LA
if offset==0:
RecursionError: maximum recursion depth exceeded in comparison
die Eingabedatei wie folgt aussieht:
def fun1():
return None
def fun2():
return None
Ich bin nicht sicher, ob ich den Python falsch übersetzt oder der rekursive Algorithmus ist einfach zu viel für python, aber ich kann auch nicht herausfinden, wie der Algorithmus zu ändern, für die nexttoken Methode iterativ sein, da es nicht Schwanz recursiv ist e. Vielleicht könnte das jemand herausfinden? Oder gibt es ein anderes Problem mit dem, was ich mache?