public void augment(int[] instanceLengths, String trainfile, File train_forest, int numParts) throws IOException { // System.out.print("About to train. "); // System.out.print("Num Feats: " + pipe.dataAlphabet.size()); int i, j; int[] ignore = new int[instanceLengths.length]; // String trainpartfile; // createPartitions(instanceLengths, trainfile, numParts); // for(i = 0; i < numParts; i++) // { // trainpartfile = trainfile + "." + i; // } int numInstances = instanceLengths.length; int numInstancesPerPart = numInstances / numParts; // The last partition becomes bigger pipe.initOutputFile(options.outfile); // Initialize the output file once for (j = 0; j < numParts; j++) { System.out.println("Training classifier for partition " + j); // Make partition for (i = 0; i < numInstances; i++) { if (i >= j * numInstancesPerPart && i < (j + 1) * numInstancesPerPart) ignore[i] = 1; // Mark to ignore this instance in training else ignore[i] = 0; } // Train on one split params = new Parameters(pipe.dataAlphabet.size()); train(instanceLengths, ignore, trainfile, train_forest); // Test on the other split System.out.println("Making predictions for partition " + j); for (i = 0; i < numInstances; i++) ignore[i] = 1 - ignore[i]; // Toggle ignore outputParses(ignore); } pipe.close(); // Close the output file once }
////////////////////////////////////////////////////// // Get Best Parses /////////////////////////////////// ////////////////////////////////////////////////////// public void outputParses() throws IOException { String tFile = options.testfile; String file = options.outfile; long start = System.currentTimeMillis(); pipe.initInputFile(tFile); pipe.initOutputFile(file); System.out.print("Processing Sentence: "); DependencyInstance instance = pipe.nextInstance(); int cnt = 0; while (instance != null) { cnt++; System.out.print(cnt + " "); String[] forms = instance.forms; int length = forms.length; FeatureVector[][][] fvs = new FeatureVector[forms.length][forms.length][2]; double[][][] probs = new double[forms.length][forms.length][2]; FeatureVector[][][][] nt_fvs = new FeatureVector[forms.length][pipe.types.length][2][2]; double[][][][] nt_probs = new double[forms.length][pipe.types.length][2][2]; FeatureVector[][][] fvs_trips = new FeatureVector[length][length][length]; double[][][] probs_trips = new double[length][length][length]; FeatureVector[][][] fvs_sibs = new FeatureVector[length][length][2]; double[][][] probs_sibs = new double[length][length][2]; if (options.secondOrder) ((DependencyPipe2O) pipe) .fillFeatureVectors( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, params); else pipe.fillFeatureVectors(instance, fvs, probs, nt_fvs, nt_probs, params); int K = options.testK; Object[][] d = null; if (options.decodeType.equals("proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } if (options.decodeType.equals("non-proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeNonProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeNonProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } String[] res = ((String) d[0][1]).split(" "); String[] pos = instance.cpostags; String[] formsNoRoot = new String[forms.length - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] labels = new String[formsNoRoot.length]; int[] heads = new int[formsNoRoot.length]; Arrays.toString(forms); Arrays.toString(res); for (int j = 0; j < formsNoRoot.length; j++) { formsNoRoot[j] = forms[j + 1]; posNoRoot[j] = pos[j + 1]; String[] trip = res[j].split("[\\|:]"); labels[j] = pipe.types[Integer.parseInt(trip[2])]; heads[j] = Integer.parseInt(trip[0]); } pipe.outputInstance(new DependencyInstance(formsNoRoot, posNoRoot, labels, heads)); // String line1 = ""; String line2 = ""; String line3 = ""; String line4 = ""; // for(int j = 1; j < pos.length; j++) { // String[] trip = res[j-1].split("[\\|:]"); // line1+= sent[j] + "\t"; line2 += pos[j] + "\t"; // line4 += trip[0] + "\t"; line3 += pipe.types[Integer.parseInt(trip[2])] + "\t"; // } // pred.write(line1.trim() + "\n" + line2.trim() + "\n" // + (pipe.labeled ? line3.trim() + "\n" : "") // + line4.trim() + "\n\n"); instance = pipe.nextInstance(); } pipe.close(); long end = System.currentTimeMillis(); System.out.println("Took: " + (end - start)); }
////////////////////////////////////////////////////// // Get Best Parses /////////////////////////////////// ////////////////////////////////////////////////////// public void outputParses(int[] ignore) throws IOException { String tFile = options.testfile; String file = options.outfile; long start = System.currentTimeMillis(); pipe.initInputFile(tFile); // if (ignore == null) // afm 03-07-2008 --- If this is called for each partition, must have // initialized output file before if (!options.train || !options .stackedLevel0) // afm 03-07-2008 --- If this is called for each partition, must have // initialized output file before pipe.initOutputFile(file); System.out.print("Processing Sentence: "); DependencyInstance instance = pipe.nextInstance(); int cnt = 0; int i = 0; LabelClassifier oc = new LabelClassifier(options); while (instance != null) { cnt++; System.out.print(cnt + " "); String[] forms = instance.forms; int length = forms.length; // afm 03-07-08 --- If this instance is to be ignored, just go for the next one if (ignore != null && ignore[i] != 0) { instance = pipe.nextInstance(); i++; continue; } FeatureVector[][][] fvs = new FeatureVector[forms.length][forms.length][2]; double[][][] probs = new double[forms.length][forms.length][2]; FeatureVector[][][][] nt_fvs = new FeatureVector[forms.length][pipe.types.length][2][2]; double[][][][] nt_probs = new double[forms.length][pipe.types.length][2][2]; FeatureVector[][][] fvs_trips = new FeatureVector[length][length][length]; double[][][] probs_trips = new double[length][length][length]; FeatureVector[][][] fvs_sibs = new FeatureVector[length][length][2]; double[][][] probs_sibs = new double[length][length][2]; if (options.secondOrder) ((DependencyPipe2O) pipe) .fillFeatureVectors( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, params); else pipe.fillFeatureVectors(instance, fvs, probs, nt_fvs, nt_probs, params); int K = options.testK; Object[][] d = null; if (options.decodeType.equals("proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } if (options.decodeType.equals("non-proj")) { if (options.secondOrder) { d = ((DependencyDecoder2O) decoder) .decodeNonProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); } else d = decoder.decodeNonProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } String[] res = ((String) d[0][1]).split(" "); String[] pos = instance.cpostags; String[] formsNoRoot = new String[forms.length - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] labels = new String[formsNoRoot.length]; int[] heads = new int[formsNoRoot.length]; Arrays.toString(forms); Arrays.toString(res); for (int j = 0; j < formsNoRoot.length; j++) { formsNoRoot[j] = forms[j + 1]; posNoRoot[j] = pos[j + 1]; String[] trip = res[j].split("[\\|:]"); labels[j] = pipe.types[Integer.parseInt(trip[2])]; heads[j] = Integer.parseInt(trip[0]); } // afm 06-04-08 if (options.separateLab) { /* * ask whether instance contains level0 information */ /* * Note, forms and pos have the root. labels and heads do not */ if (options.stackedLevel1) labels = oc.outputLabels( classifier, instance.forms, instance.postags, labels, heads, instance.deprels_pred, instance.heads_pred, instance); else labels = oc.outputLabels( classifier, instance.forms, instance.postags, labels, heads, null, null, instance); } // afm 03-07-08 // if (ignore == null) if (options.stackedLevel0 == false) pipe.outputInstance(new DependencyInstance(formsNoRoot, posNoRoot, labels, heads)); else { int[] headsNoRoot = new int[instance.heads.length - 1]; String[] labelsNoRoot = new String[instance.heads.length - 1]; for (int j = 0; j < headsNoRoot.length; j++) { headsNoRoot[j] = instance.heads[j + 1]; labelsNoRoot[j] = instance.deprels[j + 1]; } DependencyInstance out_inst = new DependencyInstance(formsNoRoot, posNoRoot, labelsNoRoot, headsNoRoot); out_inst.stacked = true; out_inst.heads_pred = heads; out_inst.deprels_pred = labels; pipe.outputInstance(out_inst); } // String line1 = ""; String line2 = ""; String line3 = ""; String line4 = ""; // for(int j = 1; j < pos.length; j++) { // String[] trip = res[j-1].split("[\\|:]"); // line1+= sent[j] + "\t"; line2 += pos[j] + "\t"; // line4 += trip[0] + "\t"; line3 += pipe.types[Integer.parseInt(trip[2])] + "\t"; // } // pred.write(line1.trim() + "\n" + line2.trim() + "\n" // + (pipe.labeled ? line3.trim() + "\n" : "") // + line4.trim() + "\n\n"); instance = pipe.nextInstance(); i++; } // if (ignore == null) // afm 03-07-2008 --- If this is called for each partition (ignore != // null), must close pipe outside the loop if (!options.train || !options .stackedLevel0) // afm 03-07-2008 --- If this is called for each partition (ignore != // null), must close pipe outside the loop pipe.close(); long end = System.currentTimeMillis(); System.out.println("Took: " + (end - start)); }
/** * Get the parses. * * @param allInstances a list to which all parse results are written. Can be {@code null}. * @param writeOutput write output to file and log some messages to screen. */ protected void outputParses(List<DependencyInstance> allInstances, boolean writeOutput) throws IOException { String tFile = options.testfile; String file = null; if (writeOutput) { file = options.outfile; } ConfidenceEstimator confEstimator = null; if (options.confidenceEstimator != null) { confEstimator = ConfidenceEstimator.resolveByName(options.confidenceEstimator, this); System.out.println("Applying confidence estimation: " + options.confidenceEstimator); } long start = System.currentTimeMillis(); pipe.initInputFile(tFile); if (writeOutput) { pipe.initOutputFile(file); } if (writeOutput) { System.out.print("Processing Sentence: "); } DependencyInstance instance = pipe.nextInstance(); int cnt = 0; while (instance != null) { cnt++; if (writeOutput) { System.out.print(cnt + " "); } String[] forms = instance.forms; String[] formsNoRoot = new String[forms.length - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] cposNoRoot = new String[formsNoRoot.length]; String[] labels = new String[formsNoRoot.length]; int[] heads = new int[formsNoRoot.length]; decode( instance, options.testK, params, formsNoRoot, cposNoRoot, posNoRoot, labels, heads, confEstimator, writeOutput); /* DependencyInstance parsedInstance; if (confEstimator != null) { double[] confidenceScores = confEstimator.estimateConfidence(instance); parsedInstance = new DependencyInstance(formsNoRoot, posNoRoot, labels, heads, confidenceScores); } else { parsedInstance = new DependencyInstance(formsNoRoot, posNoRoot, labels, heads); } if (writeOutput) { pipe.outputInstance(parsedInstance); } if (allInstances != null) { allInstances.add(parsedInstance); } */ // String line1 = ""; String line2 = ""; String line3 = ""; String line4 = ""; // for(int j = 1; j < pos.length; j++) { // String[] trip = res[j-1].split("[\\|:]"); // line1+= sent[j] + "\t"; line2 += pos[j] + "\t"; // line4 += trip[0] + "\t"; line3 += pipe.types[Integer.parseInt(trip[2])] + "\t"; // } // pred.write(line1.trim() + "\n" + line2.trim() + "\n" // + (pipe.labeled ? line3.trim() + "\n" : "") // + line4.trim() + "\n\n"); instance = pipe.nextInstance(); } pipe.close(); if (writeOutput) { long end = System.currentTimeMillis(); System.out.println("Took: " + (end - start)); } }