/** * Converts a {@link Grammar} and {@link Lexicon} to BUBS sparse-matrix format, specifically * {@link LeftCscSparseMatrixGrammar}. Prunes rules below the minimum rule probability threshold * specified when the {@link DiscriminativeMergeObjectiveFunction} was initialized with {@link * #init(CompleteClosureModel, List, List, float)}. * * @param grammar * @param lexicon * @return {@link LeftCscSparseMatrixGrammar} */ protected LeftCscSparseMatrixGrammar convertGrammarToSparseMatrix( final Grammar grammar, final Lexicon lexicon) { try { final Writer w = new StringWriter(150 * 1024 * 1024); // Note We could use a PipedOutputStream / PipedInputStream combination (with 2 threads) to // write and read // at the same time, and avoid using enough memory to serialize the entire grammar. But memory // isn't a huge // constraint during training, and the threading would add complexity, so we'll skip that for // now. w.write(grammar.toString(lexicon.totalRules(minRuleProbability), minRuleProbability, 0, 0)); w.write("===== LEXICON =====\n"); w.write(lexicon.toString(minRuleProbability)); return new LeftCscSparseMatrixGrammar( new StringReader(w.toString()), new DecisionTreeTokenClassifier()); } catch (final IOException e) { // StringWriter and StringReader should never IOException throw new AssertionError(e); } }