public void train(int[] instanceLengths, String trainfile, File train_forest) throws IOException { // System.out.print("About to train. "); // System.out.print("Num Feats: " + pipe.dataAlphabet.size()); int i = 0; for (i = 0; i < options.numIters; i++) { System.out.print(" Iteration " + i); // System.out.println("========================"); // System.out.println("Iteration: " + i); // System.out.println("========================"); System.out.print("["); long start = System.currentTimeMillis(); trainingIter(instanceLengths, trainfile, train_forest, i + 1); long end = System.currentTimeMillis(); // System.out.println("Training iter took: " + (end-start)); System.out.println("|Time:" + (end - start) + "]"); } params.averageParams(i * instanceLengths.length); }
public void train(int[] instanceLengths, int[] ignore, String trainfile, File train_forest) throws IOException { int i = 0; for (i = 0; i < options.numIters; i++) { System.out.print(" Iteration " + i); System.out.print("["); long start = System.currentTimeMillis(); trainingIter(instanceLengths, ignore, trainfile, train_forest, i + 1); long end = System.currentTimeMillis(); // System.out.println("Training iter took: " + (end-start)); System.out.println("|Time:" + (end - start) + "]"); } params.averageParams(i * countActualInstances(ignore)); // afm 06-04-08 if (options.separateLab) { LabelClassifier oc = new LabelClassifier( options, instanceLengths, ignore, trainfile, train_forest, this, pipe); try { classifier = oc.trainClassifier(100); } catch (Exception e) { e.printStackTrace(); } } }
///////////////////////////////////////////////////// // RUNNING THE PARSER //////////////////////////////////////////////////// public static void main(String[] args) throws FileNotFoundException, Exception { System.setProperty("java.io.tmpdir", "./tmp/"); ParserOptions options = new ParserOptions(args); System.out.println("Default temp directory:" + System.getProperty("java.io.tmpdir")); System.out.println("Separate labeling: " + options.separateLab); if (options.train) { DependencyPipe pipe = options.secondOrder ? new DependencyPipe2O(options) : new DependencyPipe(options); int[] instanceLengths = pipe.createInstances(options.trainfile, options.trainforest); pipe.closeAlphabets(); DependencyParser dp = new DependencyParser(pipe, options); // pipe.printModelStats(null); int numFeats = pipe.dataAlphabet.size(); int numTypes = pipe.typeAlphabet.size(); System.out.print("Num Feats: " + numFeats); System.out.println(".\tNum Edge Labels: " + numTypes); if (options .stackedLevel0) // Augment training data with output predictions, for stacked learning // (afm 03-03-08) { // Output data augmented with output predictions System.out.println("Augmenting training data with output predictions..."); options.testfile = options.trainfile; dp.augment( instanceLengths, options.trainfile, options.trainforest, options.augmentNumParts); // Now train the base classifier in the whole corpus, nothing being ignored System.out.println("Training the base classifier in the whole corpus..."); } // afm 03-06-08 --- To allow some instances to be ignored int ignore[] = new int[instanceLengths.length]; for (int i = 0; i < instanceLengths.length; i++) ignore[i] = 0; dp.params = new Parameters(pipe.dataAlphabet.size()); dp.train(instanceLengths, ignore, options.trainfile, options.trainforest); System.out.print("Saving model..."); dp.saveModel(options.modelName); System.out.print("done."); } if (options.test) { DependencyPipe pipe = options.secondOrder ? new DependencyPipe2O(options) : new DependencyPipe(options); DependencyParser dp = new DependencyParser(pipe, options); System.out.print("\tLoading model..."); dp.loadModel(options.modelName); System.out.println("done."); pipe.printModelStats(dp.params); pipe.closeAlphabets(); dp.outputParses(null); } System.out.println(); if (options.eval) { System.out.println("\nEVALUATION PERFORMANCE:"); DependencyEvaluator.evaluate(options.goldfile, options.outfile, options.format); } }
public void testBig() throws NoConvergenceException { for (int k = 0; k < Abig.length; ++k) { Abig[k][k] = 0.1; Bbig[k] = k; } for (int k = 0; k < Abig.length; ++k) { for (int l = 0; l < Abig.length; ++l) { Abig[k][l] += k * l / 1000.0; } } for (int k = 0; k < Abig.length; ++k) { for (int l = 0; l < Abig.length; ++l) { Abig[k][l] += (k % 3) * (l % 3) / 10.0; } } long before, after; before = System.currentTimeMillis(); Minres.solve(Abig, Bbig); after = System.currentTimeMillis(); System.out.println("Minres took " + (after - before) / 1000.0 + " seconds"); before = System.currentTimeMillis(); SymmLQ.solve(Abig, Bbig); after = System.currentTimeMillis(); System.out.println("Symmlq took " + (after - before) / 1000.0 + " seconds"); before = System.currentTimeMillis(); ConjugateGradient.solve(Abig, Bbig); after = System.currentTimeMillis(); System.out.println("ConjugateGradient took " + (after - before) / 1000.0 + " seconds"); before = System.currentTimeMillis(); JSciSolver.solve(Abig, Bbig); after = System.currentTimeMillis(); System.out.println("LU Decomposition took " + (after - before) / 1000.0 + " seconds"); }
private void processBatch( @NotNull final ProgressIndicator indicator, @NotNull Set<VirtualFile> files) { assert !myApplication.isDispatchThread(); final int resolvedInPreviousBatch = this.resolvedInPreviousBatch; final int totalSize = files.size() + resolvedInPreviousBatch; final ConcurrentIntObjectMap<int[]> fileToForwardIds = ContainerUtil.createConcurrentIntObjectMap(); final Set<VirtualFile> toProcess = Collections.synchronizedSet(files); indicator.setIndeterminate(false); ProgressIndicatorUtils.forceWriteActionPriority(indicator, (Disposable) indicator); long start = System.currentTimeMillis(); Processor<VirtualFile> processor = file -> { double fraction = 1 - toProcess.size() * 1.0 / totalSize; indicator.setFraction(fraction); try { if (!file.isDirectory() && toResolve(file, myProject)) { int fileId = getAbsId(file); int i = totalSize - toProcess.size(); indicator.setText(i + "/" + totalSize + ": Resolving " + file.getPresentableUrl()); int[] forwardIds = processFile(file, fileId, indicator); if (forwardIds == null) { // queueUpdate(file); return false; } fileToForwardIds.put(fileId, forwardIds); } toProcess.remove(file); return true; } catch (RuntimeException e) { indicator.checkCanceled(); } return true; }; boolean success = true; try { success = processFilesConcurrently(files, indicator, processor); } finally { this.resolvedInPreviousBatch = toProcess.isEmpty() ? 0 : totalSize - toProcess.size(); queue(toProcess, "re-added after fail. success=" + success); storeIds(fileToForwardIds); long end = System.currentTimeMillis(); log( "Resolved batch of " + (totalSize - toProcess.size()) + " from " + totalSize + " files in " + ((end - start) / 1000) + "sec. (Gap: " + storage.gap + ")"); synchronized (filesToResolve) { upToDate = filesToResolve.isEmpty(); log("upToDate = " + upToDate); if (upToDate) { for (Listener listener : myListeners) { listener.allFilesResolved(); } } } } }
////////////////////////////////////////////////////// // Get Best Parses /////////////////////////////////// ////////////////////////////////////////////////////// public void outputParses() throws IOException { String tFile = options.testfile; String file = options.outfile; long start = System.currentTimeMillis(); pipe.initInputFile(tFile); pipe.initOutputFile(file); System.out.print("Processing Sentence: "); DependencyInstance instance = pipe.nextInstance(); int cnt = 0; while (instance != null) { cnt++; System.out.print(cnt + " "); String[] forms = instance.forms; int length = forms.length; FeatureVector[][][] fvs = new FeatureVector[forms.length][forms.length][2]; double[][][] probs = new double[forms.length][forms.length][2]; FeatureVector[][][][] nt_fvs = new FeatureVector[forms.length][pipe.types.length][2][2]; double[][][][] nt_probs = new double[forms.length][pipe.types.length][2][2]; FeatureVector[][][] fvs_trips = new FeatureVector[length][length][length]; double[][][] probs_trips = new double[length][length][length]; FeatureVector[][][] fvs_sibs = new FeatureVector[length][length][2]; double[][][] probs_sibs = new double[length][length][2]; if (options.secondOrder) ((DependencyPipe2O) pipe) .fillFeatureVectors( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, params); else pipe.fillFeatureVectors(instance, fvs, probs, nt_fvs, nt_probs, params); int K = options.testK; Object[][] d = null; if (options.decodeType.equals("proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } if (options.decodeType.equals("non-proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeNonProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeNonProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } String[] res = ((String) d[0][1]).split(" "); String[] pos = instance.cpostags; String[] formsNoRoot = new String[forms.length - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] labels = new String[formsNoRoot.length]; int[] heads = new int[formsNoRoot.length]; Arrays.toString(forms); Arrays.toString(res); for (int j = 0; j < formsNoRoot.length; j++) { formsNoRoot[j] = forms[j + 1]; posNoRoot[j] = pos[j + 1]; String[] trip = res[j].split("[\\|:]"); labels[j] = pipe.types[Integer.parseInt(trip[2])]; heads[j] = Integer.parseInt(trip[0]); } pipe.outputInstance(new DependencyInstance(formsNoRoot, posNoRoot, labels, heads)); // String line1 = ""; String line2 = ""; String line3 = ""; String line4 = ""; // for(int j = 1; j < pos.length; j++) { // String[] trip = res[j-1].split("[\\|:]"); // line1+= sent[j] + "\t"; line2 += pos[j] + "\t"; // line4 += trip[0] + "\t"; line3 += pipe.types[Integer.parseInt(trip[2])] + "\t"; // } // pred.write(line1.trim() + "\n" + line2.trim() + "\n" // + (pipe.labeled ? line3.trim() + "\n" : "") // + line4.trim() + "\n\n"); instance = pipe.nextInstance(); } pipe.close(); long end = System.currentTimeMillis(); System.out.println("Took: " + (end - start)); }
////////////////////////////////////////////////////// // Get Best Parses /////////////////////////////////// ////////////////////////////////////////////////////// public void outputParses(int[] ignore) throws IOException { String tFile = options.testfile; String file = options.outfile; long start = System.currentTimeMillis(); pipe.initInputFile(tFile); // if (ignore == null) // afm 03-07-2008 --- If this is called for each partition, must have // initialized output file before if (!options.train || !options .stackedLevel0) // afm 03-07-2008 --- If this is called for each partition, must have // initialized output file before pipe.initOutputFile(file); System.out.print("Processing Sentence: "); DependencyInstance instance = pipe.nextInstance(); int cnt = 0; int i = 0; LabelClassifier oc = new LabelClassifier(options); while (instance != null) { cnt++; System.out.print(cnt + " "); String[] forms = instance.forms; int length = forms.length; // afm 03-07-08 --- If this instance is to be ignored, just go for the next one if (ignore != null && ignore[i] != 0) { instance = pipe.nextInstance(); i++; continue; } FeatureVector[][][] fvs = new FeatureVector[forms.length][forms.length][2]; double[][][] probs = new double[forms.length][forms.length][2]; FeatureVector[][][][] nt_fvs = new FeatureVector[forms.length][pipe.types.length][2][2]; double[][][][] nt_probs = new double[forms.length][pipe.types.length][2][2]; FeatureVector[][][] fvs_trips = new FeatureVector[length][length][length]; double[][][] probs_trips = new double[length][length][length]; FeatureVector[][][] fvs_sibs = new FeatureVector[length][length][2]; double[][][] probs_sibs = new double[length][length][2]; if (options.secondOrder) ((DependencyPipe2O) pipe) .fillFeatureVectors( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, params); else pipe.fillFeatureVectors(instance, fvs, probs, nt_fvs, nt_probs, params); int K = options.testK; Object[][] d = null; if (options.decodeType.equals("proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } if (options.decodeType.equals("non-proj")) { if (options.secondOrder) { d = ((DependencyDecoder2O) decoder) .decodeNonProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); } else d = decoder.decodeNonProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } String[] res = ((String) d[0][1]).split(" "); String[] pos = instance.cpostags; String[] formsNoRoot = new String[forms.length - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] labels = new String[formsNoRoot.length]; int[] heads = new int[formsNoRoot.length]; Arrays.toString(forms); Arrays.toString(res); for (int j = 0; j < formsNoRoot.length; j++) { formsNoRoot[j] = forms[j + 1]; posNoRoot[j] = pos[j + 1]; String[] trip = res[j].split("[\\|:]"); labels[j] = pipe.types[Integer.parseInt(trip[2])]; heads[j] = Integer.parseInt(trip[0]); } // afm 06-04-08 if (options.separateLab) { /* * ask whether instance contains level0 information */ /* * Note, forms and pos have the root. labels and heads do not */ if (options.stackedLevel1) labels = oc.outputLabels( classifier, instance.forms, instance.postags, labels, heads, instance.deprels_pred, instance.heads_pred, instance); else labels = oc.outputLabels( classifier, instance.forms, instance.postags, labels, heads, null, null, instance); } // afm 03-07-08 // if (ignore == null) if (options.stackedLevel0 == false) pipe.outputInstance(new DependencyInstance(formsNoRoot, posNoRoot, labels, heads)); else { int[] headsNoRoot = new int[instance.heads.length - 1]; String[] labelsNoRoot = new String[instance.heads.length - 1]; for (int j = 0; j < headsNoRoot.length; j++) { headsNoRoot[j] = instance.heads[j + 1]; labelsNoRoot[j] = instance.deprels[j + 1]; } DependencyInstance out_inst = new DependencyInstance(formsNoRoot, posNoRoot, labelsNoRoot, headsNoRoot); out_inst.stacked = true; out_inst.heads_pred = heads; out_inst.deprels_pred = labels; pipe.outputInstance(out_inst); } // String line1 = ""; String line2 = ""; String line3 = ""; String line4 = ""; // for(int j = 1; j < pos.length; j++) { // String[] trip = res[j-1].split("[\\|:]"); // line1+= sent[j] + "\t"; line2 += pos[j] + "\t"; // line4 += trip[0] + "\t"; line3 += pipe.types[Integer.parseInt(trip[2])] + "\t"; // } // pred.write(line1.trim() + "\n" + line2.trim() + "\n" // + (pipe.labeled ? line3.trim() + "\n" : "") // + line4.trim() + "\n\n"); instance = pipe.nextInstance(); i++; } // if (ignore == null) // afm 03-07-2008 --- If this is called for each partition (ignore != // null), must close pipe outside the loop if (!options.train || !options .stackedLevel0) // afm 03-07-2008 --- If this is called for each partition (ignore != // null), must close pipe outside the loop pipe.close(); long end = System.currentTimeMillis(); System.out.println("Took: " + (end - start)); }