Exemple #1
0
  public void train(int[] instanceLengths, String trainfile, File train_forest) throws IOException {

    // System.out.print("About to train. ");
    // System.out.print("Num Feats: " + pipe.dataAlphabet.size());

    int i = 0;
    for (i = 0; i < options.numIters; i++) {

      System.out.print(" Iteration " + i);
      // System.out.println("========================");
      // System.out.println("Iteration: " + i);
      // System.out.println("========================");
      System.out.print("[");

      long start = System.currentTimeMillis();

      trainingIter(instanceLengths, trainfile, train_forest, i + 1);

      long end = System.currentTimeMillis();
      // System.out.println("Training iter took: " + (end-start));
      System.out.println("|Time:" + (end - start) + "]");
    }

    params.averageParams(i * instanceLengths.length);
  }
  public void train(int[] instanceLengths, int[] ignore, String trainfile, File train_forest)
      throws IOException {

    int i = 0;
    for (i = 0; i < options.numIters; i++) {

      System.out.print(" Iteration " + i);
      System.out.print("[");

      long start = System.currentTimeMillis();

      trainingIter(instanceLengths, ignore, trainfile, train_forest, i + 1);

      long end = System.currentTimeMillis();
      // System.out.println("Training iter took: " + (end-start));
      System.out.println("|Time:" + (end - start) + "]");
    }
    params.averageParams(i * countActualInstances(ignore));
    //	 afm 06-04-08
    if (options.separateLab) {
      LabelClassifier oc =
          new LabelClassifier(
              options, instanceLengths, ignore, trainfile, train_forest, this, pipe);
      try {
        classifier = oc.trainClassifier(100);
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
  }
  /////////////////////////////////////////////////////
  // RUNNING THE PARSER
  ////////////////////////////////////////////////////
  public static void main(String[] args) throws FileNotFoundException, Exception {
    System.setProperty("java.io.tmpdir", "./tmp/");
    ParserOptions options = new ParserOptions(args);
    System.out.println("Default temp directory:" + System.getProperty("java.io.tmpdir"));

    System.out.println("Separate labeling: " + options.separateLab);

    if (options.train) {
      DependencyPipe pipe =
          options.secondOrder ? new DependencyPipe2O(options) : new DependencyPipe(options);
      int[] instanceLengths = pipe.createInstances(options.trainfile, options.trainforest);
      pipe.closeAlphabets();
      DependencyParser dp = new DependencyParser(pipe, options);
      // pipe.printModelStats(null);
      int numFeats = pipe.dataAlphabet.size();
      int numTypes = pipe.typeAlphabet.size();
      System.out.print("Num Feats: " + numFeats);
      System.out.println(".\tNum Edge Labels: " + numTypes);
      if (options
          .stackedLevel0) // Augment training data with output predictions, for stacked learning
      // (afm 03-03-08)
      {
        // Output data augmented with output predictions
        System.out.println("Augmenting training data with output predictions...");
        options.testfile = options.trainfile;
        dp.augment(
            instanceLengths, options.trainfile, options.trainforest, options.augmentNumParts);
        // Now train the base classifier in the whole corpus, nothing being ignored
        System.out.println("Training the base classifier in the whole corpus...");
      }
      // afm 03-06-08 --- To allow some instances to be ignored
      int ignore[] = new int[instanceLengths.length];
      for (int i = 0; i < instanceLengths.length; i++) ignore[i] = 0;
      dp.params = new Parameters(pipe.dataAlphabet.size());
      dp.train(instanceLengths, ignore, options.trainfile, options.trainforest);
      System.out.print("Saving model...");
      dp.saveModel(options.modelName);
      System.out.print("done.");
    }
    if (options.test) {
      DependencyPipe pipe =
          options.secondOrder ? new DependencyPipe2O(options) : new DependencyPipe(options);
      DependencyParser dp = new DependencyParser(pipe, options);
      System.out.print("\tLoading model...");
      dp.loadModel(options.modelName);
      System.out.println("done.");
      pipe.printModelStats(dp.params);
      pipe.closeAlphabets();
      dp.outputParses(null);
    }

    System.out.println();

    if (options.eval) {
      System.out.println("\nEVALUATION PERFORMANCE:");
      DependencyEvaluator.evaluate(options.goldfile, options.outfile, options.format);
    }
  }
 public void testBig() throws NoConvergenceException {
   for (int k = 0; k < Abig.length; ++k) {
     Abig[k][k] = 0.1;
     Bbig[k] = k;
   }
   for (int k = 0; k < Abig.length; ++k) {
     for (int l = 0; l < Abig.length; ++l) {
       Abig[k][l] += k * l / 1000.0;
     }
   }
   for (int k = 0; k < Abig.length; ++k) {
     for (int l = 0; l < Abig.length; ++l) {
       Abig[k][l] += (k % 3) * (l % 3) / 10.0;
     }
   }
   long before, after;
   before = System.currentTimeMillis();
   Minres.solve(Abig, Bbig);
   after = System.currentTimeMillis();
   System.out.println("Minres took " + (after - before) / 1000.0 + " seconds");
   before = System.currentTimeMillis();
   SymmLQ.solve(Abig, Bbig);
   after = System.currentTimeMillis();
   System.out.println("Symmlq took " + (after - before) / 1000.0 + " seconds");
   before = System.currentTimeMillis();
   ConjugateGradient.solve(Abig, Bbig);
   after = System.currentTimeMillis();
   System.out.println("ConjugateGradient took " + (after - before) / 1000.0 + " seconds");
   before = System.currentTimeMillis();
   JSciSolver.solve(Abig, Bbig);
   after = System.currentTimeMillis();
   System.out.println("LU Decomposition took " + (after - before) / 1000.0 + " seconds");
 }
  private void processBatch(
      @NotNull final ProgressIndicator indicator, @NotNull Set<VirtualFile> files) {
    assert !myApplication.isDispatchThread();
    final int resolvedInPreviousBatch = this.resolvedInPreviousBatch;
    final int totalSize = files.size() + resolvedInPreviousBatch;
    final ConcurrentIntObjectMap<int[]> fileToForwardIds =
        ContainerUtil.createConcurrentIntObjectMap();
    final Set<VirtualFile> toProcess = Collections.synchronizedSet(files);
    indicator.setIndeterminate(false);
    ProgressIndicatorUtils.forceWriteActionPriority(indicator, (Disposable) indicator);
    long start = System.currentTimeMillis();
    Processor<VirtualFile> processor =
        file -> {
          double fraction = 1 - toProcess.size() * 1.0 / totalSize;
          indicator.setFraction(fraction);
          try {
            if (!file.isDirectory() && toResolve(file, myProject)) {
              int fileId = getAbsId(file);
              int i = totalSize - toProcess.size();
              indicator.setText(i + "/" + totalSize + ": Resolving " + file.getPresentableUrl());
              int[] forwardIds = processFile(file, fileId, indicator);
              if (forwardIds == null) {
                // queueUpdate(file);
                return false;
              }
              fileToForwardIds.put(fileId, forwardIds);
            }
            toProcess.remove(file);
            return true;
          } catch (RuntimeException e) {
            indicator.checkCanceled();
          }
          return true;
        };
    boolean success = true;
    try {
      success = processFilesConcurrently(files, indicator, processor);
    } finally {
      this.resolvedInPreviousBatch = toProcess.isEmpty() ? 0 : totalSize - toProcess.size();
      queue(toProcess, "re-added after fail. success=" + success);
      storeIds(fileToForwardIds);

      long end = System.currentTimeMillis();
      log(
          "Resolved batch of "
              + (totalSize - toProcess.size())
              + " from "
              + totalSize
              + " files in "
              + ((end - start) / 1000)
              + "sec. (Gap: "
              + storage.gap
              + ")");
      synchronized (filesToResolve) {
        upToDate = filesToResolve.isEmpty();
        log("upToDate = " + upToDate);
        if (upToDate) {
          for (Listener listener : myListeners) {
            listener.allFilesResolved();
          }
        }
      }
    }
  }
Exemple #6
0
  //////////////////////////////////////////////////////
  // Get Best Parses ///////////////////////////////////
  //////////////////////////////////////////////////////
  public void outputParses() throws IOException {

    String tFile = options.testfile;
    String file = options.outfile;

    long start = System.currentTimeMillis();

    pipe.initInputFile(tFile);
    pipe.initOutputFile(file);

    System.out.print("Processing Sentence: ");
    DependencyInstance instance = pipe.nextInstance();
    int cnt = 0;
    while (instance != null) {
      cnt++;
      System.out.print(cnt + " ");
      String[] forms = instance.forms;

      int length = forms.length;

      FeatureVector[][][] fvs = new FeatureVector[forms.length][forms.length][2];
      double[][][] probs = new double[forms.length][forms.length][2];
      FeatureVector[][][][] nt_fvs = new FeatureVector[forms.length][pipe.types.length][2][2];
      double[][][][] nt_probs = new double[forms.length][pipe.types.length][2][2];
      FeatureVector[][][] fvs_trips = new FeatureVector[length][length][length];
      double[][][] probs_trips = new double[length][length][length];
      FeatureVector[][][] fvs_sibs = new FeatureVector[length][length][2];
      double[][][] probs_sibs = new double[length][length][2];
      if (options.secondOrder)
        ((DependencyPipe2O) pipe)
            .fillFeatureVectors(
                instance,
                fvs,
                probs,
                fvs_trips,
                probs_trips,
                fvs_sibs,
                probs_sibs,
                nt_fvs,
                nt_probs,
                params);
      else pipe.fillFeatureVectors(instance, fvs, probs, nt_fvs, nt_probs, params);

      int K = options.testK;
      Object[][] d = null;
      if (options.decodeType.equals("proj")) {
        if (options.secondOrder)
          d =
              ((DependencyDecoder2O) decoder)
                  .decodeProjective(
                      instance,
                      fvs,
                      probs,
                      fvs_trips,
                      probs_trips,
                      fvs_sibs,
                      probs_sibs,
                      nt_fvs,
                      nt_probs,
                      K);
        else d = decoder.decodeProjective(instance, fvs, probs, nt_fvs, nt_probs, K);
      }
      if (options.decodeType.equals("non-proj")) {
        if (options.secondOrder)
          d =
              ((DependencyDecoder2O) decoder)
                  .decodeNonProjective(
                      instance,
                      fvs,
                      probs,
                      fvs_trips,
                      probs_trips,
                      fvs_sibs,
                      probs_sibs,
                      nt_fvs,
                      nt_probs,
                      K);
        else d = decoder.decodeNonProjective(instance, fvs, probs, nt_fvs, nt_probs, K);
      }

      String[] res = ((String) d[0][1]).split(" ");

      String[] pos = instance.cpostags;

      String[] formsNoRoot = new String[forms.length - 1];
      String[] posNoRoot = new String[formsNoRoot.length];
      String[] labels = new String[formsNoRoot.length];
      int[] heads = new int[formsNoRoot.length];

      Arrays.toString(forms);
      Arrays.toString(res);
      for (int j = 0; j < formsNoRoot.length; j++) {
        formsNoRoot[j] = forms[j + 1];
        posNoRoot[j] = pos[j + 1];

        String[] trip = res[j].split("[\\|:]");
        labels[j] = pipe.types[Integer.parseInt(trip[2])];
        heads[j] = Integer.parseInt(trip[0]);
      }

      pipe.outputInstance(new DependencyInstance(formsNoRoot, posNoRoot, labels, heads));

      // String line1 = ""; String line2 = ""; String line3 = ""; String line4 = "";
      // for(int j = 1; j < pos.length; j++) {
      //	String[] trip = res[j-1].split("[\\|:]");
      //	line1+= sent[j] + "\t"; line2 += pos[j] + "\t";
      //	line4 += trip[0] + "\t"; line3 += pipe.types[Integer.parseInt(trip[2])] + "\t";
      // }
      // pred.write(line1.trim() + "\n" + line2.trim() + "\n"
      //	       + (pipe.labeled ? line3.trim() + "\n" : "")
      //	       + line4.trim() + "\n\n");

      instance = pipe.nextInstance();
    }
    pipe.close();

    long end = System.currentTimeMillis();
    System.out.println("Took: " + (end - start));
  }
  //////////////////////////////////////////////////////
  // Get Best Parses ///////////////////////////////////
  //////////////////////////////////////////////////////
  public void outputParses(int[] ignore) throws IOException {

    String tFile = options.testfile;
    String file = options.outfile;

    long start = System.currentTimeMillis();

    pipe.initInputFile(tFile);
    // if (ignore == null) // afm 03-07-2008 --- If this is called for each partition, must have
    // initialized output file before
    if (!options.train
        || !options
            .stackedLevel0) // afm 03-07-2008 --- If this is called for each partition, must have
      // initialized output file before
      pipe.initOutputFile(file);

    System.out.print("Processing Sentence: ");
    DependencyInstance instance = pipe.nextInstance();
    int cnt = 0;
    int i = 0;
    LabelClassifier oc = new LabelClassifier(options);
    while (instance != null) {
      cnt++;
      System.out.print(cnt + " ");
      String[] forms = instance.forms;

      int length = forms.length;

      // afm 03-07-08 --- If this instance is to be ignored, just go for the next one
      if (ignore != null && ignore[i] != 0) {
        instance = pipe.nextInstance();
        i++;
        continue;
      }

      FeatureVector[][][] fvs = new FeatureVector[forms.length][forms.length][2];
      double[][][] probs = new double[forms.length][forms.length][2];
      FeatureVector[][][][] nt_fvs = new FeatureVector[forms.length][pipe.types.length][2][2];
      double[][][][] nt_probs = new double[forms.length][pipe.types.length][2][2];
      FeatureVector[][][] fvs_trips = new FeatureVector[length][length][length];
      double[][][] probs_trips = new double[length][length][length];
      FeatureVector[][][] fvs_sibs = new FeatureVector[length][length][2];
      double[][][] probs_sibs = new double[length][length][2];
      if (options.secondOrder)
        ((DependencyPipe2O) pipe)
            .fillFeatureVectors(
                instance,
                fvs,
                probs,
                fvs_trips,
                probs_trips,
                fvs_sibs,
                probs_sibs,
                nt_fvs,
                nt_probs,
                params);
      else pipe.fillFeatureVectors(instance, fvs, probs, nt_fvs, nt_probs, params);

      int K = options.testK;
      Object[][] d = null;

      if (options.decodeType.equals("proj")) {
        if (options.secondOrder)
          d =
              ((DependencyDecoder2O) decoder)
                  .decodeProjective(
                      instance,
                      fvs,
                      probs,
                      fvs_trips,
                      probs_trips,
                      fvs_sibs,
                      probs_sibs,
                      nt_fvs,
                      nt_probs,
                      K);
        else d = decoder.decodeProjective(instance, fvs, probs, nt_fvs, nt_probs, K);
      }
      if (options.decodeType.equals("non-proj")) {

        if (options.secondOrder) {
          d =
              ((DependencyDecoder2O) decoder)
                  .decodeNonProjective(
                      instance,
                      fvs,
                      probs,
                      fvs_trips,
                      probs_trips,
                      fvs_sibs,
                      probs_sibs,
                      nt_fvs,
                      nt_probs,
                      K);

        } else d = decoder.decodeNonProjective(instance, fvs, probs, nt_fvs, nt_probs, K);
      }

      String[] res = ((String) d[0][1]).split(" ");
      String[] pos = instance.cpostags;

      String[] formsNoRoot = new String[forms.length - 1];
      String[] posNoRoot = new String[formsNoRoot.length];
      String[] labels = new String[formsNoRoot.length];
      int[] heads = new int[formsNoRoot.length];

      Arrays.toString(forms);
      Arrays.toString(res);
      for (int j = 0; j < formsNoRoot.length; j++) {
        formsNoRoot[j] = forms[j + 1];
        posNoRoot[j] = pos[j + 1];

        String[] trip = res[j].split("[\\|:]");
        labels[j] = pipe.types[Integer.parseInt(trip[2])];
        heads[j] = Integer.parseInt(trip[0]);
      }

      //	 afm 06-04-08
      if (options.separateLab) {
        /*
         * ask whether instance contains level0 information
         */
        /*
         * Note, forms and pos have the root. labels and heads do not
         */
        if (options.stackedLevel1)
          labels =
              oc.outputLabels(
                  classifier,
                  instance.forms,
                  instance.postags,
                  labels,
                  heads,
                  instance.deprels_pred,
                  instance.heads_pred,
                  instance);
        else
          labels =
              oc.outputLabels(
                  classifier,
                  instance.forms,
                  instance.postags,
                  labels,
                  heads,
                  null,
                  null,
                  instance);
      }

      // afm 03-07-08
      // if (ignore == null)
      if (options.stackedLevel0 == false)
        pipe.outputInstance(new DependencyInstance(formsNoRoot, posNoRoot, labels, heads));
      else {
        int[] headsNoRoot = new int[instance.heads.length - 1];
        String[] labelsNoRoot = new String[instance.heads.length - 1];
        for (int j = 0; j < headsNoRoot.length; j++) {
          headsNoRoot[j] = instance.heads[j + 1];
          labelsNoRoot[j] = instance.deprels[j + 1];
        }
        DependencyInstance out_inst =
            new DependencyInstance(formsNoRoot, posNoRoot, labelsNoRoot, headsNoRoot);
        out_inst.stacked = true;
        out_inst.heads_pred = heads;
        out_inst.deprels_pred = labels;
        pipe.outputInstance(out_inst);
      }

      // String line1 = ""; String line2 = ""; String line3 = ""; String line4 = "";
      // for(int j = 1; j < pos.length; j++) {
      //	String[] trip = res[j-1].split("[\\|:]");
      //	line1+= sent[j] + "\t"; line2 += pos[j] + "\t";
      //	line4 += trip[0] + "\t"; line3 += pipe.types[Integer.parseInt(trip[2])] + "\t";
      // }
      // pred.write(line1.trim() + "\n" + line2.trim() + "\n"
      //	       + (pipe.labeled ? line3.trim() + "\n" : "")
      //	       + line4.trim() + "\n\n");

      instance = pipe.nextInstance();
      i++;
    }
    // if (ignore == null) // afm 03-07-2008 --- If this is called for each partition (ignore !=
    // null), must close pipe outside the loop
    if (!options.train
        || !options
            .stackedLevel0) // afm 03-07-2008 --- If this is called for each partition (ignore !=
      // null), must close pipe outside the loop
      pipe.close();

    long end = System.currentTimeMillis();
    System.out.println("Took: " + (end - start));
  }