/** * Evaluates speed and accuracy with the baseline (fully-split) grammar * * @param grammar * @param lexicon * @param cycle */ @Override public void initMergeCycle(final Grammar grammar, final Lexicon lexicon, final int cycle) { this.splitGrammar = grammar; this.splitLexicon = lexicon; final String[] split = GlobalConfigProperties.singleton() .getProperty(PROPERTY_BEAM_WIDTHS, DEFAULT_BEAM_WIDTHS) .split(","); this.beamWidth = Integer.parseInt(split[cycle - 1]); // Convert the grammar to BUBS sparse-matrix format and train a Boundary POS FOM BaseLogger.singleton() .info("Constrained parsing the training-set and training a prioritization model"); final LeftCscSparseMatrixGrammar sparseMatrixGrammar = convertGrammarToSparseMatrix(splitGrammar, splitLexicon); final BoundaryPosModel posFom = trainPosFom(sparseMatrixGrammar); // Record accuracy with the full split grammar (so we can later compare with MergeCandidates) BaseLogger.singleton().info("Parsing the dev-set with the fully-split grammar"); final float[] parseResult = parseDevSet(sparseMatrixGrammar, posFom, beamWidth); splitF1 = parseResult[0]; splitSpeed = parseResult[1]; BaseLogger.singleton() .info(String.format("F1 = %.3f Speed = %.3f w/s", splitF1 * 100, splitSpeed)); }
public GrammarParallelCscSpmvParser( final ParserDriver opts, final LeftCscSparseMatrixGrammar grammar) { super(opts, grammar); final ConfigProperties props = GlobalConfigProperties.singleton(); // Split the binary grammar rules into segments of roughly equal size final int requestedThreads = props.getIntProperty(ParserDriver.OPT_GRAMMAR_THREAD_COUNT); final int[] segments = new int[requestedThreads + 1]; final int segmentSize = grammar.cscBinaryRowIndices.length / requestedThreads + 1; segments[0] = 0; int i = 1; // Examine each populated column for (int j = 1; j < grammar.cscBinaryPopulatedColumns.length - 1; j++) { if (grammar.cscBinaryPopulatedColumnOffsets[j] - grammar.cscBinaryPopulatedColumnOffsets[segments[i - 1]] >= segmentSize) { segments[i++] = j; } } segments[i] = grammar.cscBinaryPopulatedColumnOffsets.length - 1; this.grammarThreads = i; this.cpvSegments = grammarThreads * 2; GlobalConfigProperties.singleton() .setProperty( ParserDriver.RUNTIME_CONFIGURED_THREAD_COUNT, Integer.toString( props.getIntProperty(ParserDriver.OPT_CELL_THREAD_COUNT, 1) * grammarThreads)); this.binaryRowSegments = new int[i + 1]; System.arraycopy(segments, 0, binaryRowSegments, 0, binaryRowSegments.length); if (BaseLogger.singleton().isLoggable(Level.FINE)) { final StringBuilder sb = new StringBuilder(); for (int j = 1; j < binaryRowSegments.length; j++) { sb.append( (grammar.cscBinaryPopulatedColumnOffsets[binaryRowSegments[j]] - grammar.cscBinaryPopulatedColumnOffsets[binaryRowSegments[j - 1]]) + " "); } BaseLogger.singleton().fine("INFO: CSC Binary Grammar segments of length: " + sb.toString()); } // Temporary cell storage for each grammar-level thread this.threadLocalTemporaryCellArrays = new ThreadLocal<PackedArrayChart.TemporaryChartCell[]>() { @Override protected PackedArrayChart.TemporaryChartCell[] initialValue() { final PackedArrayChart.TemporaryChartCell[] tcs = new PackedArrayChart.TemporaryChartCell[grammarThreads]; for (int j = 0; j < grammarThreads; j++) { tcs[j] = new PackedArrayChart.TemporaryChartCell(grammar, false); } return tcs; } }; }