protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) { // Build NFAs from the grammar AST grammar.buildNFA(); // Create the DFA predictors for each decision grammar.createLookaheadDFAs(); }
/** * Create NFA, DFA and generate code for grammar. Create NFA for any delegates first. Once all NFA * are created, it's ok to create DFA, which must check for left-recursion. That check is done by * walking the full NFA, which therefore must be complete. After all NFA, comes DFA conversion for * root grammar then code gen for root grammar. DFA and code gen for delegates comes next. */ protected void generateRecognizer(Grammar grammar) { String language = (String) grammar.getOption("language"); if (language != null) { CodeGenerator generator = new CodeGenerator(this, grammar, language); grammar.setCodeGenerator(generator); generator.setDebug(isDebug()); generator.setProfile(isProfile()); generator.setTrace(isTrace()); // generate NFA early in case of crash later (for debugging) if (isGenerate_NFA_dot()) { generateNFAs(grammar); } // GENERATE CODE generator.genRecognizer(); if (isGenerate_DFA_dot()) { generateDFAs(grammar); } List<Grammar> delegates = grammar.getDirectDelegates(); for (int i = 0; delegates != null && i < delegates.size(); i++) { Grammar delegate = (Grammar) delegates.get(i); if (delegate != grammar) { // already processing this one generateRecognizer(delegate); } } } }
/** * This method is used by all code generators to create new output files. If the outputDir set by * -o is not present it will be created. The final filename is sensitive to the output directory * and the directory where the grammar file was found. If -o is /tmp and the original grammar file * was foo/t.g then output files go in /tmp/foo. * * <p>The output dir -o spec takes precedence if it's absolute. E.g., if the grammar file dir is * absolute the output dir is given precendence. "-o /tmp /usr/lib/t.g" results in "/tmp/T.java" * as output (assuming t.g holds T.java). * * <p>If no -o is specified, then just write to the directory where the grammar file was found. * * <p>If outputDirectory==null then write a String. */ public Writer getOutputFile(Grammar g, String fileName) throws IOException { if (getOutputDirectory() == null) { return new StringWriter(); } // output directory is a function of where the grammar file lives // for subdir/T.g, you get subdir here. Well, depends on -o etc... // But, if this is a .tokens file, then we force the output to // be the base output directory (or current directory if there is not a -o) // File outputDir; if (fileName.endsWith(CodeGenerator.VOCAB_FILE_EXTENSION)) { if (haveOutputDir) { outputDir = new File(getOutputDirectory()); } else { outputDir = new File("."); } } else { outputDir = getOutputDirectory(g.getFileName()); } File outputFile = new File(outputDir, fileName); if (!outputDir.exists()) { outputDir.mkdirs(); } FileWriter fw = new FileWriter(outputFile); return new BufferedWriter(fw); }
/** * Convert from an ANTLR char literal found in a grammar file to an equivalent char literal in the * target language. For most languages, this means leaving 'x' as 'x'. Actually, we need to escape * '\u000A' so that it doesn't get converted to \n by the compiler. Convert the literal to the * char value and then to an appropriate target char literal. * * <p>Expect single quotes around the incoming literal. */ public String getTargetCharLiteralFromANTLRCharLiteral(CodeGenerator generator, String literal) { StringBuffer buf = new StringBuffer(); buf.append('\''); int c = Grammar.getCharValueFromGrammarCharLiteral(literal); if (c < Label.MIN_CHAR_VALUE) { return "'\u0000'"; } if (c < targetCharValueEscape.length && targetCharValueEscape[c] != null) { buf.append(targetCharValueEscape[c]); } else if (Character.UnicodeBlock.of((char) c) == Character.UnicodeBlock.BASIC_LATIN && !Character.isISOControl((char) c)) { // normal char buf.append((char) c); } else { // must be something unprintable...use \\uXXXX // turn on the bit above max "\\uFFFF" value so that we pad with zeros // then only take last 4 digits String hex = Integer.toHexString(c | 0x10000).toUpperCase().substring(1, 5); buf.append("\\u"); buf.append(hex); } buf.append('\''); return buf.toString(); }
protected void generateNFAs(Grammar g) { DOTGenerator dotGenerator = new DOTGenerator(g); Collection rules = g.getAllImportedRules(); rules.addAll(g.getRules()); for (Iterator itr = rules.iterator(); itr.hasNext(); ) { Rule r = (Rule) itr.next(); try { String dot = dotGenerator.getDOT(r.startState); if (dot != null) { writeDOTFile(g, r, dot); } } catch (IOException ioe) { ErrorManager.error(ErrorManager.MSG_CANNOT_WRITE_FILE, ioe); } } }
public void generateDFAs(Grammar g) { for (int d = 1; d <= g.getNumberOfDecisions(); d++) { DFA dfa = g.getLookaheadDFA(d); if (dfa == null) { continue; // not there for some reason, ignore } DOTGenerator dotGenerator = new DOTGenerator(g); String dot = dotGenerator.getDOT(dfa.startState); String dotFileName = g.name + "." + "dec-" + d; if (g.implicitLexer) { dotFileName = g.name + Grammar.grammarTypeToFileNameSuffix[g.type] + "." + "dec-" + d; } try { writeDOTFile(g, dotFileName, dot); } catch (IOException ioe) { ErrorManager.error(ErrorManager.MSG_CANNOT_GEN_DOT_FILE, dotFileName, ioe); } } }
/** Get a grammar mentioned on the command-line and any delegates */ public Grammar getRootGrammar(String grammarFileName) throws IOException { // StringTemplate.setLintMode(true); // grammars mentioned on command line are either roots or single grammars. // create the necessary composite in case it's got delegates; even // single grammar needs it to get token types. CompositeGrammar composite = new CompositeGrammar(); Grammar grammar = new Grammar(this, grammarFileName, composite); composite.setDelegationRoot(grammar); FileReader fr = null; File f = null; if (haveInputDir) { f = new File(inputDirectory, grammarFileName); } else { f = new File(grammarFileName); } // Store the location of this grammar as if we import files, we can then // search for imports in the same location as the original grammar as well as in // the lib directory. // parentGrammarDirectory = f.getParent(); if (grammarFileName.lastIndexOf(File.separatorChar) == -1) { grammarOutputDirectory = "."; } else { grammarOutputDirectory = grammarFileName.substring(0, grammarFileName.lastIndexOf(File.separatorChar)); } fr = new FileReader(f); BufferedReader br = new BufferedReader(fr); grammar.parseAndBuildAST(br); composite.watchNFAConversion = internalOption_watchNFAConversion; br.close(); fr.close(); return grammar; }
@Override public String getTargetCharLiteralFromANTLRCharLiteral( final CodeGenerator generator, final String literal) { final StringBuffer buf = new StringBuffer(10); final int c = Grammar.getCharValueFromGrammarCharLiteral(literal); if (c < Label.MIN_CHAR_VALUE) { buf.append("\\x{0000}"); } else if (c < targetCharValueEscape.length && targetCharValueEscape[c] != null) { buf.append(targetCharValueEscape[c]); } else if (Character.UnicodeBlock.of((char) c) == Character.UnicodeBlock.BASIC_LATIN && !Character.isISOControl((char) c)) { // normal char buf.append((char) c); } else { // must be something unprintable...use \\uXXXX // turn on the bit above max "\\uFFFF" value so that we pad with zeros // then only take last 4 digits String hex = Integer.toHexString(c | 0x10000).toUpperCase().substring(1, 5); buf.append("\\x{"); buf.append(hex); buf.append("}"); } if (buf.indexOf("\\") == -1) { // no need for interpolation, use single quotes buf.insert(0, '\''); buf.append('\''); } else { // need string interpolation buf.insert(0, '\"'); buf.append('\"'); } return buf.toString(); }
public void process() { boolean exceptionWhenWritingLexerFile = false; String lexerGrammarFileName = null; // necessary at this scope to have access in the catch below // Have to be tricky here when Maven or build tools call in and must new Tool() // before setting options. The banner won't display that way! if (isVerbose() && showBanner) { ErrorManager.info("ANTLR Parser Generator Version " + VERSION); showBanner = false; } try { sortGrammarFiles(); // update grammarFileNames } catch (Exception e) { ErrorManager.error(ErrorManager.MSG_INTERNAL_ERROR, e); } catch (Error e) { ErrorManager.error(ErrorManager.MSG_INTERNAL_ERROR, e); } for (String grammarFileName : grammarFileNames) { // If we are in make mode (to support build tools like Maven) and the // file is already up to date, then we do not build it (and in verbose mode // we will say so). if (make) { try { if (!buildRequired(grammarFileName)) continue; } catch (Exception e) { ErrorManager.error(ErrorManager.MSG_INTERNAL_ERROR, e); } } if (isVerbose() && !isDepend()) { System.out.println(grammarFileName); } try { if (isDepend()) { BuildDependencyGenerator dep = new BuildDependencyGenerator(this, grammarFileName); /* List outputFiles = dep.getGeneratedFileList(); List dependents = dep.getDependenciesFileList(); System.out.println("output: "+outputFiles); System.out.println("dependents: "+dependents); */ System.out.println(dep.getDependencies()); continue; } Grammar grammar = getRootGrammar(grammarFileName); // we now have all grammars read in as ASTs // (i.e., root and all delegates) grammar.composite.assignTokenTypes(); grammar.composite.defineGrammarSymbols(); grammar.composite.createNFAs(); generateRecognizer(grammar); if (isPrintGrammar()) { grammar.printGrammar(System.out); } if (isReport()) { GrammarReport greport = new GrammarReport(grammar); System.out.println(greport.toString()); // print out a backtracking report too (that is not encoded into log) System.out.println(greport.getBacktrackingReport()); // same for aborted NFA->DFA conversions System.out.println(greport.getAnalysisTimeoutReport()); } if (isProfile()) { GrammarReport greport = new GrammarReport(grammar); Stats.writeReport(GrammarReport.GRAMMAR_STATS_FILENAME, greport.toNotifyString()); } // now handle the lexer if one was created for a merged spec String lexerGrammarStr = grammar.getLexerGrammar(); // System.out.println("lexer grammar:\n"+lexerGrammarStr); if (grammar.type == Grammar.COMBINED && lexerGrammarStr != null) { lexerGrammarFileName = grammar.getImplicitlyGeneratedLexerFileName(); try { Writer w = getOutputFile(grammar, lexerGrammarFileName); w.write(lexerGrammarStr); w.close(); } catch (IOException e) { // emit different error message when creating the implicit lexer fails // due to write permission error exceptionWhenWritingLexerFile = true; throw e; } try { StringReader sr = new StringReader(lexerGrammarStr); Grammar lexerGrammar = new Grammar(); lexerGrammar.composite.watchNFAConversion = internalOption_watchNFAConversion; lexerGrammar.implicitLexer = true; lexerGrammar.setTool(this); File lexerGrammarFullFile = new File(getFileDirectory(lexerGrammarFileName), lexerGrammarFileName); lexerGrammar.setFileName(lexerGrammarFullFile.toString()); lexerGrammar.importTokenVocabulary(grammar); lexerGrammar.parseAndBuildAST(sr); sr.close(); lexerGrammar.composite.assignTokenTypes(); lexerGrammar.composite.defineGrammarSymbols(); lexerGrammar.composite.createNFAs(); generateRecognizer(lexerGrammar); } finally { // make sure we clean up if (deleteTempLexer) { File outputDir = getOutputDirectory(lexerGrammarFileName); File outputFile = new File(outputDir, lexerGrammarFileName); outputFile.delete(); } } } } catch (IOException e) { if (exceptionWhenWritingLexerFile) { ErrorManager.error(ErrorManager.MSG_CANNOT_WRITE_FILE, lexerGrammarFileName, e); } else { ErrorManager.error(ErrorManager.MSG_CANNOT_OPEN_FILE, grammarFileName); } } catch (Exception e) { ErrorManager.error(ErrorManager.MSG_INTERNAL_ERROR, grammarFileName, e); } /* finally { System.out.println("creates="+ Interval.creates); System.out.println("hits="+ Interval.hits); System.out.println("misses="+ Interval.misses); System.out.println("outOfRange="+ Interval.outOfRange); } */ } }
public String getTargetCharLiteralFromANTLRCharLiteral(CodeGenerator generator, String literal) { int c = Grammar.getCharValueFromGrammarCharLiteral(literal); return String.valueOf(c); }