public void loadModel(String file) throws Exception { ObjectInputStream in = new ObjectInputStream(new FileInputStream(file)); params.parameters = (double[]) in.readObject(); pipe.dataAlphabet = (Alphabet) in.readObject(); pipe.typeAlphabet = (Alphabet) in.readObject(); in.close(); pipe.closeAlphabets(); }
// /////////////////////////////////////////////////// // RUNNING THE PARSER // ////////////////////////////////////////////////// public static void main(String[] args) throws FileNotFoundException, Exception { ParserOptions options = new ParserOptions(args); if (options.train) { DependencyPipe pipe = options.secondOrder ? new DependencyPipe2O(options) : new DependencyPipe(options); int[] instanceLengths = pipe.createInstances(options.trainfile, options.trainforest); pipe.closeAlphabets(); DependencyParser dp = new DependencyParser(pipe, options); int numFeats = pipe.dataAlphabet.size(); int numTypes = pipe.typeAlphabet.size(); System.out.print("Num Feats: " + numFeats); System.out.println(".\tNum Edge Labels: " + numTypes); dp.train(instanceLengths, options.trainfile, options.trainforest); System.out.print("Saving model..."); dp.saveModel(options.modelName); System.out.print("done."); } if (options.test) { DependencyPipe pipe = options.secondOrder ? new DependencyPipe2O(options) : new DependencyPipe(options); scoreWriter = new BufferedWriter(new FileWriter(options.outfile + ".mstscores")); DependencyParser dp = new DependencyParser(pipe, options); System.out.print("\tLoading model..."); dp.loadModel(options.modelName); System.out.println("done."); pipe.closeAlphabets(); dp.outputParses(); scoreWriter.close(); } System.out.println(); if (options.eval) { System.out.println("\nEVALUATION PERFORMANCE:"); DependencyEvaluator.evaluate( options.goldfile, options.outfile, options.format, (options.confidenceEstimator != null)); } if (options.rankEdgesByConfidence) { System.out.println("\nRank edges by confidence:"); EdgeRankerByConfidence edgeRanker = new EdgeRankerByConfidence(); edgeRanker.rankEdgesByConfidence(options.goldfile, options.outfile, options.format); } }
public void loadModel(InputStream inputStream) throws IOException { try { ObjectInputStream is = new ObjectInputStream(inputStream); params.parameters = (double[]) is.readObject(); pipe.dataAlphabet = (Alphabet) is.readObject(); pipe.typeAlphabet = (Alphabet) is.readObject(); pipe.closeAlphabets(); } catch (ClassNotFoundException e) { IOException e2 = new IOException("Unable to load model: " + e.getMessage()); e2.initCause(e); throw e2; } }
public void loadModel(String file) throws Exception { ObjectInputStream in = new ObjectInputStream(new FileInputStream(file)); params.parameters = (double[]) in.readObject(); pipe.dataAlphabet = (Alphabet) in.readObject(); pipe.typeAlphabet = (Alphabet) in.readObject(); // afm 06-04-08 if (options.separateLab) { classifier = (Classifier) in.readObject(); } in.close(); pipe.closeAlphabets(); }
public void augment(int[] instanceLengths, String trainfile, File train_forest, int numParts) throws IOException { // System.out.print("About to train. "); // System.out.print("Num Feats: " + pipe.dataAlphabet.size()); int i, j; int[] ignore = new int[instanceLengths.length]; // String trainpartfile; // createPartitions(instanceLengths, trainfile, numParts); // for(i = 0; i < numParts; i++) // { // trainpartfile = trainfile + "." + i; // } int numInstances = instanceLengths.length; int numInstancesPerPart = numInstances / numParts; // The last partition becomes bigger pipe.initOutputFile(options.outfile); // Initialize the output file once for (j = 0; j < numParts; j++) { System.out.println("Training classifier for partition " + j); // Make partition for (i = 0; i < numInstances; i++) { if (i >= j * numInstancesPerPart && i < (j + 1) * numInstancesPerPart) ignore[i] = 1; // Mark to ignore this instance in training else ignore[i] = 0; } // Train on one split params = new Parameters(pipe.dataAlphabet.size()); train(instanceLengths, ignore, trainfile, train_forest); // Test on the other split System.out.println("Making predictions for partition " + j); for (i = 0; i < numInstances; i++) ignore[i] = 1 - ignore[i]; // Toggle ignore outputParses(ignore); } pipe.close(); // Close the output file once }
public void decode( DependencyInstance instance, int K, Parameters params, String[] formsNoRoot, String[] cposNoRoot, String[] posNoRoot, String[] labels, int[] heads, ConfidenceEstimator confEstimator, boolean writeOutput) throws IOException { String[] results = decode(instance, K, params); int i = 0; while (i < results.length && !results[i].equals("null")) { // write scores scoreWriter.write(scores[i] + " "); // System.out.println(results[i]); String[] res = results[i].split(" "); String[] forms = instance.forms; String[] cpos = instance.cpostags; String[] pos = instance.postags; for (int j = 0; j < forms.length - 1; j++) { formsNoRoot[j] = forms[j + 1]; cposNoRoot[j] = cpos[j + 1]; posNoRoot[j] = pos[j + 1]; String[] trip = res[j].split("[\\|:]"); // System.out.println(res[j]); labels[j] = pipe.types[Integer.parseInt(trip[2])]; heads[j] = Integer.parseInt(trip[0]); } DependencyInstance parsedInstance; if (confEstimator != null) { double[] confidenceScores = confEstimator.estimateConfidence(instance); parsedInstance = new DependencyInstance( formsNoRoot, cposNoRoot, posNoRoot, labels, heads, confidenceScores); } else { parsedInstance = new DependencyInstance(formsNoRoot, cposNoRoot, posNoRoot, labels, heads); } if (writeOutput) { pipe.outputInstance(parsedInstance); } i++; } scoreWriter.write("\n"); }
///////////////////////////////////////////////////// // RUNNING THE PARSER //////////////////////////////////////////////////// public static void main(String[] args) throws FileNotFoundException, Exception { System.setProperty("java.io.tmpdir", "./tmp/"); ParserOptions options = new ParserOptions(args); System.out.println("Default temp directory:" + System.getProperty("java.io.tmpdir")); System.out.println("Separate labeling: " + options.separateLab); if (options.train) { DependencyPipe pipe = options.secondOrder ? new DependencyPipe2O(options) : new DependencyPipe(options); int[] instanceLengths = pipe.createInstances(options.trainfile, options.trainforest); pipe.closeAlphabets(); DependencyParser dp = new DependencyParser(pipe, options); // pipe.printModelStats(null); int numFeats = pipe.dataAlphabet.size(); int numTypes = pipe.typeAlphabet.size(); System.out.print("Num Feats: " + numFeats); System.out.println(".\tNum Edge Labels: " + numTypes); if (options .stackedLevel0) // Augment training data with output predictions, for stacked learning // (afm 03-03-08) { // Output data augmented with output predictions System.out.println("Augmenting training data with output predictions..."); options.testfile = options.trainfile; dp.augment( instanceLengths, options.trainfile, options.trainforest, options.augmentNumParts); // Now train the base classifier in the whole corpus, nothing being ignored System.out.println("Training the base classifier in the whole corpus..."); } // afm 03-06-08 --- To allow some instances to be ignored int ignore[] = new int[instanceLengths.length]; for (int i = 0; i < instanceLengths.length; i++) ignore[i] = 0; dp.params = new Parameters(pipe.dataAlphabet.size()); dp.train(instanceLengths, ignore, options.trainfile, options.trainforest); System.out.print("Saving model..."); dp.saveModel(options.modelName); System.out.print("done."); } if (options.test) { DependencyPipe pipe = options.secondOrder ? new DependencyPipe2O(options) : new DependencyPipe(options); DependencyParser dp = new DependencyParser(pipe, options); System.out.print("\tLoading model..."); dp.loadModel(options.modelName); System.out.println("done."); pipe.printModelStats(dp.params); pipe.closeAlphabets(); dp.outputParses(null); } System.out.println(); if (options.eval) { System.out.println("\nEVALUATION PERFORMANCE:"); DependencyEvaluator.evaluate(options.goldfile, options.outfile, options.format); } }
private void trainingIter(int[] instanceLengths, String trainfile, File train_forest, int iter) throws IOException { int numUpd = 0; ObjectInputStream in = new ObjectInputStream(new FileInputStream(train_forest)); boolean evaluateI = true; int numInstances = instanceLengths.length; for (int i = 0; i < numInstances; i++) { if ((i + 1) % 500 == 0) { System.out.print((i + 1) + ","); // System.out.println(" "+(i+1)+" instances"); } int length = instanceLengths[i]; // Get production crap. FeatureVector[][][] fvs = new FeatureVector[length][length][2]; double[][][] probs = new double[length][length][2]; FeatureVector[][][][] nt_fvs = new FeatureVector[length][pipe.types.length][2][2]; double[][][][] nt_probs = new double[length][pipe.types.length][2][2]; FeatureVector[][][] fvs_trips = new FeatureVector[length][length][length]; double[][][] probs_trips = new double[length][length][length]; FeatureVector[][][] fvs_sibs = new FeatureVector[length][length][2]; double[][][] probs_sibs = new double[length][length][2]; DependencyInstance inst; if (options.secondOrder) { inst = ((DependencyPipe2O) pipe) .readInstance( in, length, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, params); } else inst = pipe.readInstance(in, length, fvs, probs, nt_fvs, nt_probs, params); double upd = (double) (options.numIters * numInstances - (numInstances * (iter - 1) + (i + 1)) + 1); int K = options.trainK; Object[][] d = null; if (options.decodeType.equals("proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeProjective( inst, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeProjective(inst, fvs, probs, nt_fvs, nt_probs, K); } if (options.decodeType.equals("non-proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeNonProjective( inst, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeNonProjective(inst, fvs, probs, nt_fvs, nt_probs, K); } params.updateParamsMIRA(inst, d, upd); } // System.out.println(""); // System.out.println(" "+numInstances+" instances"); System.out.print(numInstances); in.close(); }
////////////////////////////////////////////////////// // Get Best Parses /////////////////////////////////// ////////////////////////////////////////////////////// public void outputParses() throws IOException { String tFile = options.testfile; String file = options.outfile; long start = System.currentTimeMillis(); pipe.initInputFile(tFile); pipe.initOutputFile(file); System.out.print("Processing Sentence: "); DependencyInstance instance = pipe.nextInstance(); int cnt = 0; while (instance != null) { cnt++; System.out.print(cnt + " "); String[] forms = instance.forms; int length = forms.length; FeatureVector[][][] fvs = new FeatureVector[forms.length][forms.length][2]; double[][][] probs = new double[forms.length][forms.length][2]; FeatureVector[][][][] nt_fvs = new FeatureVector[forms.length][pipe.types.length][2][2]; double[][][][] nt_probs = new double[forms.length][pipe.types.length][2][2]; FeatureVector[][][] fvs_trips = new FeatureVector[length][length][length]; double[][][] probs_trips = new double[length][length][length]; FeatureVector[][][] fvs_sibs = new FeatureVector[length][length][2]; double[][][] probs_sibs = new double[length][length][2]; if (options.secondOrder) ((DependencyPipe2O) pipe) .fillFeatureVectors( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, params); else pipe.fillFeatureVectors(instance, fvs, probs, nt_fvs, nt_probs, params); int K = options.testK; Object[][] d = null; if (options.decodeType.equals("proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } if (options.decodeType.equals("non-proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeNonProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeNonProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } String[] res = ((String) d[0][1]).split(" "); String[] pos = instance.cpostags; String[] formsNoRoot = new String[forms.length - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] labels = new String[formsNoRoot.length]; int[] heads = new int[formsNoRoot.length]; Arrays.toString(forms); Arrays.toString(res); for (int j = 0; j < formsNoRoot.length; j++) { formsNoRoot[j] = forms[j + 1]; posNoRoot[j] = pos[j + 1]; String[] trip = res[j].split("[\\|:]"); labels[j] = pipe.types[Integer.parseInt(trip[2])]; heads[j] = Integer.parseInt(trip[0]); } pipe.outputInstance(new DependencyInstance(formsNoRoot, posNoRoot, labels, heads)); // String line1 = ""; String line2 = ""; String line3 = ""; String line4 = ""; // for(int j = 1; j < pos.length; j++) { // String[] trip = res[j-1].split("[\\|:]"); // line1+= sent[j] + "\t"; line2 += pos[j] + "\t"; // line4 += trip[0] + "\t"; line3 += pipe.types[Integer.parseInt(trip[2])] + "\t"; // } // pred.write(line1.trim() + "\n" + line2.trim() + "\n" // + (pipe.labeled ? line3.trim() + "\n" : "") // + line4.trim() + "\n\n"); instance = pipe.nextInstance(); } pipe.close(); long end = System.currentTimeMillis(); System.out.println("Took: " + (end - start)); }
////////////////////////////////////////////////////// // Get Best Parses /////////////////////////////////// ////////////////////////////////////////////////////// public void outputParses(int[] ignore) throws IOException { String tFile = options.testfile; String file = options.outfile; long start = System.currentTimeMillis(); pipe.initInputFile(tFile); // if (ignore == null) // afm 03-07-2008 --- If this is called for each partition, must have // initialized output file before if (!options.train || !options .stackedLevel0) // afm 03-07-2008 --- If this is called for each partition, must have // initialized output file before pipe.initOutputFile(file); System.out.print("Processing Sentence: "); DependencyInstance instance = pipe.nextInstance(); int cnt = 0; int i = 0; LabelClassifier oc = new LabelClassifier(options); while (instance != null) { cnt++; System.out.print(cnt + " "); String[] forms = instance.forms; int length = forms.length; // afm 03-07-08 --- If this instance is to be ignored, just go for the next one if (ignore != null && ignore[i] != 0) { instance = pipe.nextInstance(); i++; continue; } FeatureVector[][][] fvs = new FeatureVector[forms.length][forms.length][2]; double[][][] probs = new double[forms.length][forms.length][2]; FeatureVector[][][][] nt_fvs = new FeatureVector[forms.length][pipe.types.length][2][2]; double[][][][] nt_probs = new double[forms.length][pipe.types.length][2][2]; FeatureVector[][][] fvs_trips = new FeatureVector[length][length][length]; double[][][] probs_trips = new double[length][length][length]; FeatureVector[][][] fvs_sibs = new FeatureVector[length][length][2]; double[][][] probs_sibs = new double[length][length][2]; if (options.secondOrder) ((DependencyPipe2O) pipe) .fillFeatureVectors( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, params); else pipe.fillFeatureVectors(instance, fvs, probs, nt_fvs, nt_probs, params); int K = options.testK; Object[][] d = null; if (options.decodeType.equals("proj")) { if (options.secondOrder) d = ((DependencyDecoder2O) decoder) .decodeProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); else d = decoder.decodeProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } if (options.decodeType.equals("non-proj")) { if (options.secondOrder) { d = ((DependencyDecoder2O) decoder) .decodeNonProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); } else d = decoder.decodeNonProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } String[] res = ((String) d[0][1]).split(" "); String[] pos = instance.cpostags; String[] formsNoRoot = new String[forms.length - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] labels = new String[formsNoRoot.length]; int[] heads = new int[formsNoRoot.length]; Arrays.toString(forms); Arrays.toString(res); for (int j = 0; j < formsNoRoot.length; j++) { formsNoRoot[j] = forms[j + 1]; posNoRoot[j] = pos[j + 1]; String[] trip = res[j].split("[\\|:]"); labels[j] = pipe.types[Integer.parseInt(trip[2])]; heads[j] = Integer.parseInt(trip[0]); } // afm 06-04-08 if (options.separateLab) { /* * ask whether instance contains level0 information */ /* * Note, forms and pos have the root. labels and heads do not */ if (options.stackedLevel1) labels = oc.outputLabels( classifier, instance.forms, instance.postags, labels, heads, instance.deprels_pred, instance.heads_pred, instance); else labels = oc.outputLabels( classifier, instance.forms, instance.postags, labels, heads, null, null, instance); } // afm 03-07-08 // if (ignore == null) if (options.stackedLevel0 == false) pipe.outputInstance(new DependencyInstance(formsNoRoot, posNoRoot, labels, heads)); else { int[] headsNoRoot = new int[instance.heads.length - 1]; String[] labelsNoRoot = new String[instance.heads.length - 1]; for (int j = 0; j < headsNoRoot.length; j++) { headsNoRoot[j] = instance.heads[j + 1]; labelsNoRoot[j] = instance.deprels[j + 1]; } DependencyInstance out_inst = new DependencyInstance(formsNoRoot, posNoRoot, labelsNoRoot, headsNoRoot); out_inst.stacked = true; out_inst.heads_pred = heads; out_inst.deprels_pred = labels; pipe.outputInstance(out_inst); } // String line1 = ""; String line2 = ""; String line3 = ""; String line4 = ""; // for(int j = 1; j < pos.length; j++) { // String[] trip = res[j-1].split("[\\|:]"); // line1+= sent[j] + "\t"; line2 += pos[j] + "\t"; // line4 += trip[0] + "\t"; line3 += pipe.types[Integer.parseInt(trip[2])] + "\t"; // } // pred.write(line1.trim() + "\n" + line2.trim() + "\n" // + (pipe.labeled ? line3.trim() + "\n" : "") // + line4.trim() + "\n\n"); instance = pipe.nextInstance(); i++; } // if (ignore == null) // afm 03-07-2008 --- If this is called for each partition (ignore != // null), must close pipe outside the loop if (!options.train || !options .stackedLevel0) // afm 03-07-2008 --- If this is called for each partition (ignore != // null), must close pipe outside the loop pipe.close(); long end = System.currentTimeMillis(); System.out.println("Took: " + (end - start)); }
// //////////////////////////////////////////////////// // Decode single instance // //////////////////////////////////////////////////// String[] decode(DependencyInstance instance, int K, Parameters params) { // System.out.println(K); String[] forms = instance.forms; int length = forms.length; FeatureVector[][][] fvs = new FeatureVector[forms.length][forms.length][2]; double[][][] probs = new double[forms.length][forms.length][2]; FeatureVector[][][][] nt_fvs = new FeatureVector[forms.length][pipe.types.length][2][2]; double[][][][] nt_probs = new double[forms.length][pipe.types.length][2][2]; FeatureVector[][][] fvs_trips = new FeatureVector[length][length][length]; double[][][] probs_trips = new double[length][length][length]; FeatureVector[][][] fvs_sibs = new FeatureVector[length][length][2]; double[][][] probs_sibs = new double[length][length][2]; if (options.secondOrder) { ((DependencyPipe2O) pipe) .fillFeatureVectors( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, params); } else { pipe.fillFeatureVectors(instance, fvs, probs, nt_fvs, nt_probs, params); } Object[][] d = null; if (options.decodeType.equals("proj")) { if (options.secondOrder) { d = ((DependencyDecoder2O) decoder) .decodeProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); } else { d = decoder.decodeProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } } if (options.decodeType.equals("non-proj")) { if (options.secondOrder) { d = ((DependencyDecoder2O) decoder) .decodeNonProjective( instance, fvs, probs, fvs_trips, probs_trips, fvs_sibs, probs_sibs, nt_fvs, nt_probs, K); } else { d = decoder.decodeNonProjective(instance, fvs, probs, nt_fvs, nt_probs, K); } } // print all resulting parses StringBuffer buff = new StringBuffer(); scores = new double[d.length]; for (int i = 0; i < d.length; i++) { buff.append((String) d[i][1]).append("\n"); scores[i] = (Double) d[i][2]; } // convert scores to log prob // double logSum = logSumOfExponentials(scores); // for (int i = 0; i < d.length; i++) { // if (d[i][1] != null) // scores[i] = scores[i] - logSum; // } String[] res = buff.toString().split("\n"); return res; }
/** * Get the parses. * * @param allInstances a list to which all parse results are written. Can be {@code null}. * @param writeOutput write output to file and log some messages to screen. */ protected void outputParses(List<DependencyInstance> allInstances, boolean writeOutput) throws IOException { String tFile = options.testfile; String file = null; if (writeOutput) { file = options.outfile; } ConfidenceEstimator confEstimator = null; if (options.confidenceEstimator != null) { confEstimator = ConfidenceEstimator.resolveByName(options.confidenceEstimator, this); System.out.println("Applying confidence estimation: " + options.confidenceEstimator); } long start = System.currentTimeMillis(); pipe.initInputFile(tFile); if (writeOutput) { pipe.initOutputFile(file); } if (writeOutput) { System.out.print("Processing Sentence: "); } DependencyInstance instance = pipe.nextInstance(); int cnt = 0; while (instance != null) { cnt++; if (writeOutput) { System.out.print(cnt + " "); } String[] forms = instance.forms; String[] formsNoRoot = new String[forms.length - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] cposNoRoot = new String[formsNoRoot.length]; String[] labels = new String[formsNoRoot.length]; int[] heads = new int[formsNoRoot.length]; decode( instance, options.testK, params, formsNoRoot, cposNoRoot, posNoRoot, labels, heads, confEstimator, writeOutput); /* DependencyInstance parsedInstance; if (confEstimator != null) { double[] confidenceScores = confEstimator.estimateConfidence(instance); parsedInstance = new DependencyInstance(formsNoRoot, posNoRoot, labels, heads, confidenceScores); } else { parsedInstance = new DependencyInstance(formsNoRoot, posNoRoot, labels, heads); } if (writeOutput) { pipe.outputInstance(parsedInstance); } if (allInstances != null) { allInstances.add(parsedInstance); } */ // String line1 = ""; String line2 = ""; String line3 = ""; String line4 = ""; // for(int j = 1; j < pos.length; j++) { // String[] trip = res[j-1].split("[\\|:]"); // line1+= sent[j] + "\t"; line2 += pos[j] + "\t"; // line4 += trip[0] + "\t"; line3 += pipe.types[Integer.parseInt(trip[2])] + "\t"; // } // pred.write(line1.trim() + "\n" + line2.trim() + "\n" // + (pipe.labeled ? line3.trim() + "\n" : "") // + line4.trim() + "\n\n"); instance = pipe.nextInstance(); } pipe.close(); if (writeOutput) { long end = System.currentTimeMillis(); System.out.println("Took: " + (end - start)); } }