// gengrammar.beh -- Grammar generation // $Id: gengrammar.beh,v 1.8 2001/03/05 07:49:53 dougo Exp $ Program { /** Mark classes which are not parsed. */ private void markNotParsed() to { ClassDef, DoParse, DontParse } { {{ boolean classes_are_parsed = true; }} {{ boolean local_to_class, this_class_is_parsed; }} before ClassDef {{ local_to_class = true; this_class_is_parsed = classes_are_parsed; }} after ClassDef {{ local_to_class = false; if (!this_class_is_parsed) host.markNotParsed(); }} before DontParse {{ if (local_to_class) this_class_is_parsed = false; else classes_are_parsed = false; }} before DoParse {{ if (local_to_class) this_class_is_parsed = true; else classes_are_parsed = true; }} } } Program { /** Generate grammar file. */ private void generateGrammar(File file) {{ openOutputFile(file); generatePrecode(); generateRules(); generateTerminalRules(); generatePostcode(); closeOutputFile(); }} private void generatePrecode() {{ if (grammar_type == JAVACC) { out.println("options {"); out.println(" STATIC = false;"); out.println(" JAVA_UNICODE_ESCAPE = true;"); out.println("}"); out.println(); out.println("PARSER_BEGIN(Parser)"); } generateParserPackageDecl(); generateParserImports(); // This should be a separate file in a JAR archive... out.println("public " + (grammar_type == MPARSE ? "grammar " : "") + "class Parser { "); out.println(" // oit is uugly. Why isn't there a Character.valueOf(String)? "); out.println(" static char unescapifyChar(String s) { "); out.println(" char c = s.charAt(0); "); out.println(" if (c == '\\\\') { "); out.println(" switch (s.charAt(1)) { "); out.println(" case 'n': c = '\\n'; break; "); out.println(" case 't': c = '\\t'; break; "); out.println(" case 'b': c = '\\b'; break; "); out.println(" case 'r': c = '\\r'; break; "); out.println(" case 'f': c = '\\f'; break; "); out.println(" case '\\\\': c = '\\\\'; break; "); out.println(" case '\\'': c = '\\''; break; "); out.println(" case '\\\"': c = '\\\"'; break; "); out.println(" default: "); out.println(" c = (char) Integer.parseInt(s.substring(1, s.length()), 8); "); out.println(" break; "); out.println(" } "); out.println(" } "); out.println(" return c; "); out.println(" } "); out.println(" // Even uglier... "); out.println(" static String unescapify(String s) { "); out.println(" char str[] = new char[s.length()]; "); out.println(" int i = 0, o = 0; "); out.println(" while (i < s.length()) { "); out.println(" char c = s.charAt(i++); "); out.println(" if (c == '\\\\') { "); out.println(" int j = i + 1; "); out.println(" while (j < s.length() && "); out.println(" Character.digit(s.charAt(j), 8) != -1) { "); out.println(" j++; "); out.println(" } "); out.println(" c = unescapifyChar(s.substring(i-1, j)); "); out.println(" i = j; "); out.println(" } "); out.println(" str[o++] = c; "); out.println(" } "); out.println(" return String.valueOf(str, 0, o); "); out.println(" } "); if (grammar_type == JAVACC) { out.println("} "); out.println(); out.println("PARSER_END(Parser)"); } }} /** Print the parser package declaration. */ private void generateParserPackageDecl() to Package { // Just get the first package mentioned... // This should probably be specified by an option or something. {{ boolean got_it = false; }} before Package {{ if (!got_it) { Program.out.println(host); got_it = true; } }} } /** Print the parser import declarations. */ private void generateParserImports() bypassing Package to-stop { Import, PackageName } { // We need to import everything the user imports, plus all the // user packages, plus the demeter package. before Import {{ Program.out.println(host); }} before PackageName {{ Program.out.println("import " + host + ".*;"); }} after Program {{ Program.out.println(Program.common_imports); if (Program.grammar_type == Program.MPARSE) { Program.out.println("import com.metamata.parse.*;"); } }} } private void generatePostcode() {{ String c = (grammar_type == JAVACC ? ":" : "{"); String o = (grammar_type == JAVACC ? "{" : ""); String p = (grammar_type == JAVACC ? "" : "production "); String q = (grammar_type == JAVACC ? "\"" : "\'"); String m = (grammar_type == MPARSE ? "com.metamata.parse." : ""); out.println(); out.println(p+"boolean _boolean() "+c+" { "+m+"Token t; }"+o); out.println(" ( t= { return true; }"); out.println(" | t= { return false; }"); out.println(" )"); out.println("}"); out.println(); out.println(p+"char _char() "+c+" { "+m+"Token t; } "+o); out.println(" t= { "); out.println(" String s = t.image; "); out.println(" return unescapifyChar(s.substring(1, s.length()-1)); "); out.println(" } "); out.println("} "); out.println(); out.println(p+"byte _byte() "+c+" { int i; } "); out.println(o+" i=_int() { return (byte) i; } } "); out.println(); out.println(p+"short _short() "+c+" { int i; } "); out.println(o+" i=_int() { return (short) i; } } "); out.println(); out.println(p+"int _int() "+c+" { Number num; } "); out.println(o+" num=_Number() { return num.intValue(); } } "); out.println(); out.println(p+"long _long() "+c+" { Number num; } "); out.println(o+" num=_Number() { return num.longValue(); } } "); out.println(); out.println(p+"float _float() "+c+" { Number num; } "); out.println(o+" num=_Number() { return num.floatValue(); } } "); out.println(); out.println(p+"double _double() "+c+" { Number num; } "); out.println(o+" num=_Number() { return num.doubleValue(); } } "); out.println(); out.println(p+"Boolean _Boolean() "+c+" { "+m+"Token t; }"+o); out.println(" ( t= { return Boolean.TRUE; }"); out.println(" | t= { return Boolean.FALSE; }"); out.println(" )"); out.println("}"); out.println(); out.println(p+"Character _Character() "+c+" { char c; } "); out.println(o+" c=_char() { return new Character(c); } }"); out.println(); out.println(p+"Integer _Integer() "+c+" { int i; } "); out.println(o+" i = _int() { return new Integer(i); } }"); out.println(); out.println(p+"Long _Long() "+c+" { long l; } "); out.println(o+" l=_long() { return new Long(l); } } "); out.println(); out.println(p+"Float _Float() "+c+" { float f; } "); out.println(o+" f=_float() { return new Float(f); } } "); out.println(); out.println(p+"Double _Double() "+c+" { double d; } "); out.println(o+" d=_double() { return new Double(d); } } "); out.println(); out.println(p+"Number _Number() "+c+" "); out.println("{"); out.println(" "+m+"Token t; "); out.println(" String s = null; "); out.println(" int radix = 0; "); out.println(" Number num = null; "); out.println("} "+o); out.println(" ( "); out.println(" ( t= { "); out.println(" s = t.image; "); out.println(" radix = 10; "); out.println(" } "); out.println(" | t= { "); out.println(" // Strip off the \"0x\". "); out.println(" s = t.image.substring(2, t.image.length()); "); out.println(" radix = 16; "); out.println(" } "); out.println(" | t= { "); out.println(" s = t.image; "); out.println(" radix = 8; "); out.println(" } "); out.println(" ) { "); out.println(" switch (s.charAt(s.length()-1)) { "); out.println(" case 'l': case 'L': "); out.println(" s = s.substring(0, s.length()-1);"); // out.println(" num = Long.valueOf(s, radix);"); out.println(" num = new Long(new java.math.BigInteger(s, radix).longValue());"); out.println(" break; "); out.println(" default: "); // out.println(" num = Integer.valueOf(s, radix);"); out.println(" num = new Integer(new java.math.BigInteger(s, radix).intValue());"); out.println(" break; "); out.println(" } "); out.println(" } "); out.println(" | t= { "); out.println(" s = t.image; "); out.println(" switch (s.charAt(s.length()-1)) { "); out.println(" case 'd': case 'D': "); out.println(" num = Double.valueOf(s.substring(0, s.length()-1)); "); out.println(" break; "); out.println(" case 'f': case 'F': "); out.println(" num = Float.valueOf(s.substring(0, s.length()-1)); "); out.println(" break; "); out.println(" default: "); out.println(" num = Float.valueOf(s); "); out.println(" break; "); out.println(" } "); out.println(" } "); out.println(" ) { return num; } "); out.println("} "); out.println(); out.println(p+"String _String() "+c+" { "+m+"Token t; } "+o); out.println(" t= { "); out.println(" String s = t.image; "); out.println(" return unescapify(s.substring(1, s.length()-1)); "); out.println(" } "); out.println("} "); out.println(); out.println(p+"StringBuffer _StringBuffer() "+c+" { String s; } "); out.println(o+" s=_String() { return new StringBuffer(s); } }"); out.println(); out.println(p+"Ident _Ident() "+c+" { "+m+"Token t; } "+o); out.println(" t= { "); out.println(" return new Ident(t.image); "); out.println(" } "); out.println("} "); out.println(); out.println(p+"Text _Text() "+c+" { "+m+"Token t; } "+o); out.println(" t= { "); out.println(" String s = t.image; "); out.println(" return new Text(s.substring(2, s.length()-2)); "); out.println(" } "); out.println("} "); out.println(); out.println(p+"Line _Line() "+c+" { "+m+"Token t; } "+o); out.println(" { token_source.SwitchTo(1); } "); out.println(" t= { "); out.println(" return new Line(t.image); "); out.println(" } "); out.println("} "); out.println(); out.println(p+"Word _Word() "+c+" { "+m+"Token t; } "+o); out.println(" { token_source.SwitchTo(2); } "); out.println(" t= { "); out.println(" return new Word(t.image); "); out.println(" } "); out.println("} "); out.println(); out.println("// Lexical specification (largely taken from Java.jack): "); out.println(); out.println(grammar_type == JAVACC ? "SKIP : {" : "skip {"); out.println(" \" \" "); out.println("| \"\\t\" "); out.println("| \"\\n\" "); out.println("| \"\\r\" "); out.println("| <\"//\" (~["+q+"\\n"+q+","+q+"\\r"+q+"])* (\"\\n\"|\"\\r\\n\")> "); out.println("| <\"/*\" (~["+q+"*"+q+"])* \"*\" (~["+q+"/"+q+"] (~["+q+"*"+q+"])* \"*\")* \"/\"> "); out.println("} "); out.println(" "); out.println((grammar_type == JAVACC ? "TOKEN :" : "token")+ " { /* LITERALS */"); out.println(" < DECIMAL_LITERAL: ["+q+"1"+q+"-"+q+"9"+q+"] (["+q+"0"+q+"-"+q+"9"+q+"])* (["+q+"l"+q+","+q+"L"+q+"])? > "); out.println("| "); out.println(" < HEX_LITERAL: \"0\" ["+q+"x"+q+","+q+"X"+q+"] (["+q+"0"+q+"-"+q+"9"+q+","+q+"a"+q+"-"+q+"f"+q+","+q+"A"+q+"-"+q+"F"+q+"])+ (["+q+"l"+q+","+q+"L"+q+"])? > "); out.println("| "); out.println(" < OCTAL_LITERAL: \"0\" (["+q+"0"+q+"-"+q+"7"+q+"])* (["+q+"l"+q+","+q+"L"+q+"])? > "); out.println("| "); out.println(" < FLOATING_POINT_LITERAL: "); out.println(" (["+q+"0"+q+"-"+q+"9"+q+"])+ \".\" (["+q+"0"+q+"-"+q+"9"+q+"])+ ()? (["+q+"f"+q+","+q+"F"+q+","+q+"d"+q+","+q+"D"+q+"])? "); out.println(" | \".\" (["+q+"0"+q+"-"+q+"9"+q+"])+ ()? (["+q+"f"+q+","+q+"F"+q+","+q+"d"+q+","+q+"D"+q+"])? "); out.println(" | (["+q+"0"+q+"-"+q+"9"+q+"])+ (["+q+"f"+q+","+q+"F"+q+","+q+"d"+q+","+q+"D"+q+"])? "); out.println(" | (["+q+"0"+q+"-"+q+"9"+q+"])+ ()? ["+q+"f"+q+","+q+"F"+q+","+q+"d"+q+","+q+"D"+q+"] "); out.println(" > "); out.println("| "); out.println(" < #EXPONENT: ["+q+"e"+q+","+q+"E"+q+"] (["+q+"+"+q+","+q+"-"+q+"])? (["+q+"0"+q+"-"+q+"9"+q+"])+ > "); out.println("| "); out.println(" < CHARACTER_LITERAL: "); out.println(" \"'\" "); out.println(" ( (~["+q+"\\\'"+q+","+q+"\\\\"+q+","+q+"\\n"+q+","+q+"\\r"+q+"]) "); out.println(" | (\"\\\\\" "); out.println(" ( ["+q+"n"+q+","+q+"t"+q+","+q+"b"+q+","+q+"r"+q+","+q+"f"+q+","+q+"\\\\"+q+","+q+"\\\'"+q+","+q+"\\\""+q+"] "); out.println(" | ["+q+"0"+q+"-"+q+"7"+q+"] ( ["+q+"0"+q+"-"+q+"7"+q+"] )? "); out.println(" | ["+q+"0"+q+"-"+q+"3"+q+"] ["+q+"0"+q+"-"+q+"7"+q+"] ["+q+"0"+q+"-"+q+"7"+q+"] "); out.println(" ) "); out.println(" ) "); out.println(" ) "); out.println(" \"'\" "); out.println(" > "); out.println("| "); out.println(" < STRING_LITERAL: "); out.println(" \"\\\"\" "); out.println(" ( (~["+q+"\\\""+q+","+q+"\\\\"+q+","+q+"\\n"+q+","+q+"\\r"+q+"]) "); out.println(" | (\"\\\\\" "); out.println(" ( ["+q+"n"+q+","+q+"t"+q+","+q+"b"+q+","+q+"r"+q+","+q+"f"+q+","+q+"\\\\"+q+","+q+"\\\'"+q+","+q+"\\\""+q+"] "); out.println(" | ["+q+"0"+q+"-"+q+"7"+q+"] ( ["+q+"0"+q+"-"+q+"7"+q+"] )? "); out.println(" | ["+q+"0"+q+"-"+q+"3"+q+"] ["+q+"0"+q+"-"+q+"7"+q+"] ["+q+"0"+q+"-"+q+"7"+q+"] "); out.println(" ) "); out.println(" ) "); out.println(" )* "); out.println(" \"\\\"\" "); out.println(" > "); out.println("| "); out.println(" < TEXT_LITERAL: "); out.println(" ( \"(" + "@\" "); out.println(" (~["+q+"@"+q+"])* "); out.println(" ( \"@\" ~["+q+")"+q+"] "); out.println(" (~["+q+"@"+q+"])* "); out.println(" )* "); out.println(" \"@" + ")\" ) "); out.println(" | ( \"{" + "{\" "); out.println(" (~["+q+"}"+q+"])* "); out.println(" ( \"}\" ~["+q+"}"+q+"] "); out.println(" (~["+q+"}"+q+"])* "); out.println(" )* "); out.println(" \"}" + "}\" ) "); out.println(" > "); out.println("| "); out.println(" < TRUE: \"true\" >"); out.println("| "); out.println(" < FALSE: \"false\" >"); out.println("} "); out.println(); out.println((grammar_type == JAVACC ? "TOKEN :" : "token") + " { /* IDENTIFIERS */ "); out.println(" < IDENTIFIER: (|)* >"); out.println("|"); out.println(" < #LETTER:"); out.println(" ["); out.println(" "+q+"\\u0024"+q+","); out.println(" "+q+"\\u0041"+q+"-"+q+"\\u005a"+q+","); out.println(" "+q+"\\u005f"+q+","); out.println(" "+q+"\\u0061"+q+"-"+q+"\\u007a"+q+","); out.println(" "+q+"\\u00c0"+q+"-"+q+"\\u00d6"+q+","); out.println(" "+q+"\\u00d8"+q+"-"+q+"\\u00f6"+q+","); out.println(" "+q+"\\u00f8"+q+"-"+q+"\\u00ff"+q+","); out.println(" "+q+"\\u0100"+q+"-"+q+"\\u1fff"+q+","); out.println(" "+q+"\\u3040"+q+"-"+q+"\\u318f"+q+","); out.println(" "+q+"\\u3300"+q+"-"+q+"\\u337f"+q+","); out.println(" "+q+"\\u3400"+q+"-"+q+"\\u3d2d"+q+","); out.println(" "+q+"\\u4e00"+q+"-"+q+"\\u9fff"+q+","); out.println(" "+q+"\\uf900"+q+"-"+q+"\\ufaff"+q+""); out.println(" ]"); out.println(" >"); out.println("|"); out.println(" < #DIGIT:"); out.println(" ["); out.println(" "+q+"\\u0030"+q+"-"+q+"\\u0039"+q+","); out.println(" "+q+"\\u0660"+q+"-"+q+"\\u0669"+q+","); out.println(" "+q+"\\u06f0"+q+"-"+q+"\\u06f9"+q+","); out.println(" "+q+"\\u0966"+q+"-"+q+"\\u096f"+q+","); out.println(" "+q+"\\u09e6"+q+"-"+q+"\\u09ef"+q+","); out.println(" "+q+"\\u0a66"+q+"-"+q+"\\u0a6f"+q+","); out.println(" "+q+"\\u0ae6"+q+"-"+q+"\\u0aef"+q+","); out.println(" "+q+"\\u0b66"+q+"-"+q+"\\u0b6f"+q+","); out.println(" "+q+"\\u0be7"+q+"-"+q+"\\u0bef"+q+","); out.println(" "+q+"\\u0c66"+q+"-"+q+"\\u0c6f"+q+","); out.println(" "+q+"\\u0ce6"+q+"-"+q+"\\u0cef"+q+","); out.println(" "+q+"\\u0d66"+q+"-"+q+"\\u0d6f"+q+","); out.println(" "+q+"\\u0e50"+q+"-"+q+"\\u0e59"+q+","); out.println(" "+q+"\\u0ed0"+q+"-"+q+"\\u0ed9"+q+","); out.println(" "+q+"\\u1040"+q+"-"+q+"\\u1049"+q+""); out.println(" ]"); out.println(" >"); out.println("}"); out.println(); out.println(" " +(grammar_type == JAVACC ? "TOKEN : {" : "token {")); out.println(" < LINE: (~["+q+"\\n"+q+","+q+"\\r"+q+"])* > : DEFAULT"); out.println("}"); out.println(); out.println(" "+(grammar_type == JAVACC ? "SKIP : " : "skip ")+ " { \" \" | \"\\t\" | \"\\n\" | \"\\r\" }"); out.println(" "+(grammar_type == JAVACC ? "TOKEN : {" : "token {")); out.println(" < WORD: (~["+q+" "+q+","+q+"\\t"+q+","+q+"\\n"+q+","+q+"\\r"+q+"])* > : DEFAULT"); out.println("}"); if (grammar_type == MPARSE) { out.println("}"); } }} } ClassName { String grammarRuleName() {{ return "_" + basename(); }} String commonRuleName() {{ return "common" + grammarRuleName(); }} } Program { void generateRules() = allClassDefs { before ClassDef {{ if (!host.isNotParsed()) { String c = (Program.grammar_type == Program.JAVACC ? ":" : "{"); String o = (Program.grammar_type == Program.JAVACC ? "{" : ""); String p = (Program.grammar_type == Program.JAVACC ? "" : "production "); ClassName clname = host.get_classname(); Program.out.println(); Program.out.println(p+clname + " " + clname.grammarRuleName() + "() "+c+" {"); Program.out.println(" " + clname + " it = null;"); if (host.isAlternationClass()) { Program.out.println("} "+o); host.generateAlternationRule(); Program.out.println("}"); Program.out.println(); Program.out.println(p+"void " + clname.commonRuleName() + "(" + clname + " it) "+c+" {"); } host.generateRuleDecls(); Program.out.println("} "+o); host.generateConstructionRule(); Program.out.println("}"); } }} } } ClassDef { void generateAlternationRule() to Subclass { {{ boolean first = true; }} before ClassDef {{ Program.out.print(" ( "); }} before Subclass {{ if (first) first = false; else Program.out.print("| "); host.printLookahead(); Program.out.print("it=" + host.get_classname().grammarRuleName() + "() "); }} after ClassDef {{ Program.out.println(")"); Program.out.println(" { return it; }"); }} } } { Subclass, OptionalPart } { void printLookahead() to LocalLookahead { before LocalLookahead {{ Program.out.print(" LOOKAHEAD (" + host.get_javacode().get_code() + ")"); }} } } ClassDef { void generateRuleDecls() = allParts { before Part {{ if (!host.isFinal() && !host.isStatic() && !host.isDerived()) { Program.out.println(" " + host.get_classname() + " _" + host.get_partname() + ";"); } }} } } ClassDef { void generateConstructionRule() to { Part, PlainSyntax, Superclass, EOFtoken } { {{ ClassName classname; }} before ClassDef {{ classname = host.get_classname(); }} {{ boolean constr; }} before ConstructionClass {{ Program.out.println(" { it=new " + classname + "(); }"); constr = true; }} {{ boolean opt; }} before OptionalPart {{ Program.out.print(" ["); opt = true; host.printLookahead(); }} before Part {{ if (!host.isFinal() && !host.isStatic() && !host.isDerived()) { PartName name = host.get_partname(); Program.out.print(" _" + name + "=" + host.get_classname().grammarRuleName() + "()" + " { it.set_" + name + "(_" + name + "); }"); if (!opt) Program.out.println(); } }} after OptionalPart {{ Program.out.println(" ]"); opt = false; }} before PlainSyntax {{ Program.out.print(" " + host); }} before Superclass {{ ClassName parent = host.get_classname(); ClassDef def = Program.prog.findClassDef(parent); if (def != null && !def.isNotParsed()) { Program.out.println(" " + parent.commonRuleName() + "(it)"); } }} before EOFtoken {{ Program.out.println(" "); }} after ClassDef {{ if (constr) { Program.out.println(" { return it; }"); } else { // JavaCC doesn't like empty rules... Program.out.println(" { }"); } }} } } Program { /** Generate empty rules for all unknown terminal classes. */ void generateTerminalRules() = allParts { around ClassDef {{ if (!host.isNotParsed()) subtraversal.apply(); }} {{ Dictionary terminals = new Hashtable(); }} before Part {{ ClassName clname = host.get_classname(); if (!Program.prog.definesClass(clname)) { terminals.put(clname, clname); } }} after Program {{ Program.out.println(); Enumeration e = terminals.keys(); while (e.hasMoreElements()) { ClassName term = (ClassName) e.nextElement(); if (!term.isKnownTerminal()) { String c = (Program.grammar_type == Program.JAVACC ? ":" : "{"); String o = (Program.grammar_type == Program.JAVACC ? "{" : ""); String p = (Program.grammar_type == Program.JAVACC ? "" : "production "); Program.out.print(p+term + " " + term.grammarRuleName() + "() "+c+" { }"); Program.out.println(" "+o+" { return new " + term + "(); } }"); } } }} } } ClassDef { /** Generate code to parse an object of the class. */ private void generateParseCode(boolean is_aspect) {{ String open = (is_aspect ? Text.begin : "{"); String close = (is_aspect ? Text.end : "}"); // We don't generate parse methods for alternation classes // because then the subclasses' parse methods would have // different return values from the alternation class's. We // could get around this by putting the name of the class in the // method name, but ew. if (!isNotParsed() && isConstructionClass()) { ClassName c = get_classname(); String p = (Program.grammar_type == Program.MPARSE ? "com.metamata.parse." : ""); Program.out.println( " public static " + c + " parse(java.io.Reader in)\n" +" throws " + p + "ParseException\n" +" " + open + " return new Parser(in)._" + c + "(); " + close + "\n" +" public static " + c + " parse(java.io.InputStream in)\n" +" throws " + p + "ParseException\n" +" " + open + " return new Parser(in)._" + c + "(); " + close + "\n" +" public static " + c + " parse(String s) " + open + "\n" +" try { return parse(new java.io.StringReader(s)); }\n" +" catch ("+p+"ParseException e) {\n" +" throw new RuntimeException(e.toString());\n" +" }\n" +" " + close); } }} }