From 03fa75efba1a494f2fd83c00d3685589156b4fe3 Mon Sep 17 00:00:00 2001 From: hanjoes Date: Sat, 4 Jun 2016 15:14:12 +0800 Subject: [PATCH] differentiate prefix/postfix type decl. --- tool/src/org/antlr/v4/parse/ScopeParser.java | 455 +++++++++++-------- 1 file changed, 264 insertions(+), 191 deletions(-) diff --git a/tool/src/org/antlr/v4/parse/ScopeParser.java b/tool/src/org/antlr/v4/parse/ScopeParser.java index 743fe980d..7599c9ffc 100644 --- a/tool/src/org/antlr/v4/parse/ScopeParser.java +++ b/tool/src/org/antlr/v4/parse/ScopeParser.java @@ -32,6 +32,7 @@ package org.antlr.v4.parse; import org.antlr.runtime.BaseRecognizer; import org.antlr.runtime.CommonToken; +import org.antlr.v4.runtime.misc.IntegerList; import org.antlr.v4.runtime.misc.Pair; import org.antlr.v4.tool.Attribute; import org.antlr.v4.tool.AttributeDict; @@ -42,110 +43,78 @@ import org.antlr.v4.tool.ast.ActionAST; import java.util.ArrayList; import java.util.List; -/** Parse args, return values, locals - * - * rule[arg1, arg2, ..., argN] returns [ret1, ..., retN] - * - * text is target language dependent. Java/C#/C/C++ would - * use "int i" but ruby/python would use "i". +/** + * Parse args, return values, locals + *

+ * rule[arg1, arg2, ..., argN] returns [ret1, ..., retN] + *

+ * text is target language dependent. Java/C#/C/C++ would + * use "int i" but ruby/python would use "i". */ public class ScopeParser { - /** Given an arg or retval scope definition list like - * - * - * Map<String, String>, int[] j3, char *foo32[3] - * - * - * or - * - * - * int i=3, j=a[34]+20 - * - * - * convert to an attribute scope. - */ + /** + * Given an arg or retval scope definition list like + *

+ * + * Map<String, String>, int[] j3, char *foo32[3] + * + *

+ * or + *

+ * + * int i=3, j=a[34]+20 + * + *

+ * convert to an attribute scope. + */ public static AttributeDict parseTypedArgList(ActionAST action, String s, Grammar g) { return parse(action, s, ',', g); } - public static AttributeDict parse(ActionAST action, String s, char separator, Grammar g) { - AttributeDict dict = new AttributeDict(); + public static AttributeDict parse(ActionAST action, String s, char separator, Grammar g) { + AttributeDict dict = new AttributeDict(); List> decls = splitDecls(s, separator); for (Pair decl : decls) { -// System.out.println("decl="+decl); - if ( decl.a.trim().length()>0 ) { - Attribute a = parseAttributeDef(action, decl, g); - dict.add(a); - } + if (decl.a.trim().length() > 0) { + Attribute a = parseAttributeDef(action, decl, g); + dict.add(a); + } } - return dict; - } + return dict; + } - /** For decls like "String foo" or "char *foo32[]" compute the ID - * and type declarations. Also handle "int x=3" and 'T t = new T("foo")' - * but if the separator is ',' you cannot use ',' in the initvalue - * unless you escape use "\," escape. - */ - public static Attribute parseAttributeDef(ActionAST action, Pair decl, Grammar g) { - if ( decl.a==null ) return null; - Attribute attr = new Attribute(); - boolean inID = false; - int start = -1; - int rightEdgeOfDeclarator = decl.a.length()-1; - int equalsIndex = decl.a.indexOf('='); - if ( equalsIndex>0 ) { - // everything after the '=' is the init value - attr.initValue = decl.a.substring(equalsIndex+1,decl.a.length()); - rightEdgeOfDeclarator = equalsIndex-1; - } - // walk backwards looking for start of an ID - for (int i=rightEdgeOfDeclarator; i>=0; i--) { - // if we haven't found the end yet, keep going - if ( !inID && Character.isLetterOrDigit(decl.a.charAt(i)) ) { - inID = true; - } - else if ( inID && - !(Character.isLetterOrDigit(decl.a.charAt(i))|| - decl.a.charAt(i)=='_') ) { - start = i+1; - break; - } - } - if ( start<0 && inID ) { - start = 0; - } - if ( start<0 ) { - g.tool.errMgr.grammarError(ErrorType.CANNOT_FIND_ATTRIBUTE_NAME_IN_DECL, g.fileName, action.token, decl); - } - // walk forwards looking for end of an ID - int stop=-1; - for (int i=start; i<=rightEdgeOfDeclarator; i++) { - // if we haven't found the end yet, keep going - if ( !(Character.isLetterOrDigit(decl.a.charAt(i))|| - decl.a.charAt(i)=='_') ) - { - stop = i; - break; - } - if ( i==rightEdgeOfDeclarator ) { - stop = i+1; - } - } + /** + * For decls like "String foo" or "char *foo32[]" compute the ID + * and type declarations. Also handle "int x=3" and 'T t = new T("foo")' + * but if the separator is ',' you cannot use ',' in the initvalue + * unless you escape use "\," escape. + */ + public static Attribute parseAttributeDef(ActionAST action, Pair decl, Grammar g) { + if (decl.a == null) return null; - // the name is the last ID - attr.name = decl.a.substring(start,stop); + Attribute attr = new Attribute(); + int rightEdgeOfDeclarator = decl.a.length() - 1; + int equalsIndex = decl.a.indexOf('='); + if (equalsIndex > 0) { + // everything after the '=' is the init value + attr.initValue = decl.a.substring(equalsIndex + 1, decl.a.length()); + rightEdgeOfDeclarator = equalsIndex - 1; + } - // the type is the decl minus the ID (could be empty) - attr.type = decl.a.substring(0,start); - if ( stop<=rightEdgeOfDeclarator ) { - attr.type += decl.a.substring(stop,rightEdgeOfDeclarator+1); - } - attr.type = attr.type.trim(); - if ( attr.type.length()==0 ) { - attr.type = null; - } + String declarator = decl.a.substring(0, rightEdgeOfDeclarator + 1); + Pair p; + if (decl.a.indexOf(':') != -1) { + // declarator has type appear after the name + p = _parsePostfixDecl(attr, declarator, action, g); + } + else { + // declarator has type appear before the name + p = _parsePrefixDecl(attr, declarator, action, g); + } + int idStart = p.a; + int idStop = p.b; - attr.decl = decl.a; + attr.decl = decl.a; if (action != null) { String actionText = action.getText(); @@ -163,6 +132,7 @@ public class ScopeParser { int[] charIndexes = new int[actionText.length()]; for (int i = 0, j = 0; i < actionText.length(); i++, j++) { charIndexes[j] = i; + // skip comments if (i < actionText.length() - 1 && actionText.charAt(i) == '/' && actionText.charAt(i + 1) == '/') { while (i < actionText.length() && actionText.charAt(i) != '\n') { i++; @@ -171,10 +141,10 @@ public class ScopeParser { } int declOffset = charIndexes[decl.b]; - int declLine = lines[declOffset + start]; + int declLine = lines[declOffset + idStart]; int line = action.getToken().getLine() + declLine; - int charPositionInLine = charPositionInLines[declOffset + start]; + int charPositionInLine = charPositionInLines[declOffset + idStart]; if (declLine == 0) { /* offset for the start position of the ARG_ACTION token, plus 1 * since the ARG_ACTION text had the leading '[' stripped before @@ -183,119 +153,222 @@ public class ScopeParser { charPositionInLine += action.getToken().getCharPositionInLine() + 1; } - int offset = ((CommonToken)action.getToken()).getStartIndex(); - attr.token = new CommonToken(action.getToken().getInputStream(), ANTLRParser.ID, BaseRecognizer.DEFAULT_TOKEN_CHANNEL, offset + declOffset + start + 1, offset + declOffset + stop); + int offset = ((CommonToken) action.getToken()).getStartIndex(); + attr.token = new CommonToken(action.getToken().getInputStream(), ANTLRParser.ID, BaseRecognizer.DEFAULT_TOKEN_CHANNEL, offset + declOffset + idStart + 1, offset + declOffset + idStop); attr.token.setLine(line); attr.token.setCharPositionInLine(charPositionInLine); assert attr.name.equals(attr.token.getText()) : "Attribute text should match the pseudo-token text at this point."; } - return attr; - } + return attr; + } - /** Given an argument list like - * - * x, (*a).foo(21,33), 3.2+1, '\n', - * "a,oo\nick", {bl, "fdkj"eck}, ["cat\n,", x, 43] - * - * convert to a list of attributes. Allow nested square brackets etc... - * Set separatorChar to ';' or ',' or whatever you want. - */ - public static List> splitDecls(String s, int separatorChar) { - List> args = new ArrayList>(); - _splitArgumentList(s, 0, -1, separatorChar, args); - return args; - } + public static Pair _parsePrefixDecl(Attribute attr, String decl, ActionAST a, Grammar g) { + // walk backwards looking for start of an ID + boolean inID = false; + int start = -1; + for (int i = decl.length() - 1; i >= 0; i--) { + char ch = decl.charAt(i); + // if we haven't found the end yet, keep going + if (!inID && Character.isLetterOrDigit(ch)) { + inID = true; + } + else if (inID && !(Character.isLetterOrDigit(ch) || ch == '_')) { + start = i + 1; + break; + } + } + if (start < 0 && inID) { + start = 0; + } + if (start < 0) { + g.tool.errMgr.grammarError(ErrorType.CANNOT_FIND_ATTRIBUTE_NAME_IN_DECL, g.fileName, a.token, decl); + } - public static int _splitArgumentList(String actionText, - int start, - int targetChar, - int separatorChar, - List> args) - { - if ( actionText==null ) { - return -1; - } + // walk forward looking for end of an ID + int stop = -1; + for (int i = start; i < decl.length(); i++) { + char ch = decl.charAt(i); + // if we haven't found the end yet, keep going + if (!(Character.isLetterOrDigit(ch) || ch == '_')) { + stop = i; + break; + } + if (i == decl.length() - 1) { + stop = i + 1; + } + } - actionText = actionText.replaceAll("//[^\\n]*", ""); - int n = actionText.length(); - //System.out.println("actionText@"+start+"->"+(char)targetChar+"="+actionText.substring(start,n)); - int p = start; - int last = p; - while ( p',p+1)>=p ) { - // do we see a matching '>' ahead? if so, hope it's a generic - // and not less followed by expr with greater than - p = _splitArgumentList(actionText,p+1,'>',separatorChar,args); - } - else { - p++; // treat as normal char - } - break; - case '[' : - p = _splitArgumentList(actionText,p+1,']',separatorChar,args); - break; - default : - if ( c==separatorChar && targetChar==-1 ) { - String arg = actionText.substring(last, p); + // the name is the last ID + attr.name = decl.substring(start, stop); + + // the type is the decl minus the ID (could be empty) + attr.type = decl.substring(0, start); + if (stop <= decl.length() - 1) { + attr.type += decl.substring(stop, decl.length()); + } + + attr.type = attr.type.trim(); + if (attr.type.length() == 0) { + attr.type = null; + } + return new Pair(start, stop); + } + + public static Pair _parsePostfixDecl(Attribute attr, String decl, ActionAST a, Grammar g) { + int start = -1; + int stop = -1; + int colon = decl.indexOf(':'); + int namePartEnd = colon == -1 ? decl.length() : colon; + + // look for start of name + for (int i = 0; i < namePartEnd; ++i) { + char ch = decl.charAt(i); + if (Character.isLetterOrDigit(ch) || ch == '_') { + start = i; + break; + } + } + + if (start == -1) { + start = 0; + g.tool.errMgr.grammarError(ErrorType.CANNOT_FIND_ATTRIBUTE_NAME_IN_DECL, g.fileName, a.token, decl); + } + + // look for stop of name + for (int i = start; i < namePartEnd; ++i) { + char ch = decl.charAt(i); + if (!(Character.isLetterOrDigit(ch) || ch == '_')) { + stop = i; + break; + } + if (i == namePartEnd - 1) { + stop = namePartEnd; + } + } + + if (stop == -1) { + stop = start; + } + + // extract name from decl + attr.name = decl.substring(start, stop); + + // extract type from decl (could be empty) + if (colon == -1) { + attr.type = ""; + } + else { + attr.type = decl.substring(colon + 1, decl.length()); + } + attr.type = attr.type.trim(); + + if (attr.type.length() == 0) { + attr.type = null; + } + return new Pair(start, stop); + } + + /** + * Given an argument list like + *

+ * x, (*a).foo(21,33), 3.2+1, '\n', + * "a,oo\nick", {bl, "fdkj"eck}, ["cat\n,", x, 43] + *

+ * convert to a list of attributes. Allow nested square brackets etc... + * Set separatorChar to ';' or ',' or whatever you want. + */ + public static List> splitDecls(String s, int separatorChar) { + List> args = new ArrayList>(); + _splitArgumentList(s, 0, -1, separatorChar, args); + return args; + } + + public static int _splitArgumentList(String actionText, + int start, + int targetChar, + int separatorChar, + List> args) { + if (actionText == null) { + return -1; + } + + actionText = actionText.replaceAll("//[^\\n]*", ""); + int n = actionText.length(); + //System.out.println("actionText@"+start+"->"+(char)targetChar+"="+actionText.substring(start,n)); + int p = start; + int last = p; + while (p < n && actionText.charAt(p) != targetChar) { + int c = actionText.charAt(p); + switch (c) { + case '\'': + p++; + while (p < n && actionText.charAt(p) != '\'') { + if (actionText.charAt(p) == '\\' && (p + 1) < n && + actionText.charAt(p + 1) == '\'') { + p++; // skip escaped quote + } + p++; + } + p++; + break; + case '"': + p++; + while (p < n && actionText.charAt(p) != '\"') { + if (actionText.charAt(p) == '\\' && (p + 1) < n && + actionText.charAt(p + 1) == '\"') { + p++; // skip escaped quote + } + p++; + } + p++; + break; + case '(': + p = _splitArgumentList(actionText, p + 1, ')', separatorChar, args); + break; + case '{': + p = _splitArgumentList(actionText, p + 1, '}', separatorChar, args); + break; + case '<': + if (actionText.indexOf('>', p + 1) >= p) { + // do we see a matching '>' ahead? if so, hope it's a generic + // and not less followed by expr with greater than + p = _splitArgumentList(actionText, p + 1, '>', separatorChar, args); + } else { + p++; // treat as normal char + } + break; + case '[': + p = _splitArgumentList(actionText, p + 1, ']', separatorChar, args); + break; + default: + if (c == separatorChar && targetChar == -1) { + String arg = actionText.substring(last, p); int index = last; while (index < p && Character.isWhitespace(actionText.charAt(index))) { index++; } - //System.out.println("arg="+arg); - args.add(new Pair(arg.trim(), index)); - last = p+1; - } - p++; - break; - } - } - if ( targetChar==-1 && p<=n ) { - String arg = actionText.substring(last, p).trim(); + //System.out.println("arg="+arg); + args.add(new Pair(arg.trim(), index)); + last = p + 1; + } + p++; + break; + } + } + if (targetChar == -1 && p <= n) { + String arg = actionText.substring(last, p).trim(); int index = last; while (index < p && Character.isWhitespace(actionText.charAt(index))) { index++; } - //System.out.println("arg="+arg); - if ( arg.length()>0 ) { - args.add(new Pair(arg.trim(), index)); - } - } - p++; - return p; - } + //System.out.println("arg="+arg); + if (arg.length() > 0) { + args.add(new Pair(arg.trim(), index)); + } + } + p++; + return p; + } }