/**
   * Evaluates speed and accuracy with the baseline (fully-split) grammar
   *
   * @param grammar
   * @param lexicon
   * @param cycle
   */
  @Override
  public void initMergeCycle(final Grammar grammar, final Lexicon lexicon, final int cycle) {

    this.splitGrammar = grammar;
    this.splitLexicon = lexicon;
    final String[] split =
        GlobalConfigProperties.singleton()
            .getProperty(PROPERTY_BEAM_WIDTHS, DEFAULT_BEAM_WIDTHS)
            .split(",");
    this.beamWidth = Integer.parseInt(split[cycle - 1]);

    // Convert the grammar to BUBS sparse-matrix format and train a Boundary POS FOM
    BaseLogger.singleton()
        .info("Constrained parsing the training-set and training a prioritization model");
    final LeftCscSparseMatrixGrammar sparseMatrixGrammar =
        convertGrammarToSparseMatrix(splitGrammar, splitLexicon);
    final BoundaryPosModel posFom = trainPosFom(sparseMatrixGrammar);

    // Record accuracy with the full split grammar (so we can later compare with MergeCandidates)
    BaseLogger.singleton().info("Parsing the dev-set with the fully-split grammar");
    final float[] parseResult = parseDevSet(sparseMatrixGrammar, posFom, beamWidth);
    splitF1 = parseResult[0];
    splitSpeed = parseResult[1];
    BaseLogger.singleton()
        .info(String.format("F1 = %.3f  Speed = %.3f w/s", splitF1 * 100, splitSpeed));
  }
コード例 #2
0
  public GrammarParallelCscSpmvParser(
      final ParserDriver opts, final LeftCscSparseMatrixGrammar grammar) {
    super(opts, grammar);

    final ConfigProperties props = GlobalConfigProperties.singleton();
    // Split the binary grammar rules into segments of roughly equal size
    final int requestedThreads = props.getIntProperty(ParserDriver.OPT_GRAMMAR_THREAD_COUNT);
    final int[] segments = new int[requestedThreads + 1];
    final int segmentSize = grammar.cscBinaryRowIndices.length / requestedThreads + 1;
    segments[0] = 0;
    int i = 1;
    // Examine each populated column
    for (int j = 1; j < grammar.cscBinaryPopulatedColumns.length - 1; j++) {
      if (grammar.cscBinaryPopulatedColumnOffsets[j]
              - grammar.cscBinaryPopulatedColumnOffsets[segments[i - 1]]
          >= segmentSize) {
        segments[i++] = j;
      }
    }
    segments[i] = grammar.cscBinaryPopulatedColumnOffsets.length - 1;

    this.grammarThreads = i;
    this.cpvSegments = grammarThreads * 2;
    GlobalConfigProperties.singleton()
        .setProperty(
            ParserDriver.RUNTIME_CONFIGURED_THREAD_COUNT,
            Integer.toString(
                props.getIntProperty(ParserDriver.OPT_CELL_THREAD_COUNT, 1) * grammarThreads));

    this.binaryRowSegments = new int[i + 1];
    System.arraycopy(segments, 0, binaryRowSegments, 0, binaryRowSegments.length);

    if (BaseLogger.singleton().isLoggable(Level.FINE)) {
      final StringBuilder sb = new StringBuilder();
      for (int j = 1; j < binaryRowSegments.length; j++) {
        sb.append(
            (grammar.cscBinaryPopulatedColumnOffsets[binaryRowSegments[j]]
                    - grammar.cscBinaryPopulatedColumnOffsets[binaryRowSegments[j - 1]])
                + " ");
      }
      BaseLogger.singleton().fine("INFO: CSC Binary Grammar segments of length: " + sb.toString());
    }

    // Temporary cell storage for each grammar-level thread
    this.threadLocalTemporaryCellArrays =
        new ThreadLocal<PackedArrayChart.TemporaryChartCell[]>() {

          @Override
          protected PackedArrayChart.TemporaryChartCell[] initialValue() {
            final PackedArrayChart.TemporaryChartCell[] tcs =
                new PackedArrayChart.TemporaryChartCell[grammarThreads];
            for (int j = 0; j < grammarThreads; j++) {
              tcs[j] = new PackedArrayChart.TemporaryChartCell(grammar, false);
            }
            return tcs;
          }
        };
  }