/** * Evaluate the currently selected ranking algorithm using percenTrain% of the training samples * for training the rest as validation data. Test data is specified separately. * * @param trainFile * @param percentTrain * @param testFile Empty string for "no test data" * @param featureDefFile */ public void evaluate( String trainFile, double percentTrain, String testFile, String featureDefFile) { List<RankList> train = new ArrayList<RankList>(); List<RankList> validation = new ArrayList<RankList>(); int[] features = prepareSplit(trainFile, featureDefFile, percentTrain, normalize, train, validation); List<RankList> test = null; if (testFile.compareTo("") != 0) test = readInput(testFile); Ranker ranker = rFact.createRanker(type, train, features); ranker.set(trainScorer); ranker.setValidationSet(validation); ranker.init(); ranker.learn(); if (test != null) { double rankScore = evaluate(ranker, test); System.out.println(testScorer.name() + " on test data: " + SimpleMath.round(rankScore, 4)); } if (modelFile.compareTo("") != 0) { System.out.println(""); ranker.save(modelFile); System.out.println("Model saved to: " + modelFile); } }
/** * Evaluate the currently selected ranking algorithm using <training data, validation data, * testing data and the defined features>. * * @param trainFile * @param validationFile * @param testFile * @param featureDefFile */ public void evaluate( String trainFile, String validationFile, String testFile, String featureDefFile) { List<RankList> train = readInput(trainFile); // read input List<RankList> validation = null; if (validationFile.compareTo("") != 0) validation = readInput(validationFile); List<RankList> test = null; if (testFile.compareTo("") != 0) test = readInput(testFile); int[] features = readFeature(featureDefFile); // read features if (features == null) // no features specified ==> use all features in the training file features = getFeatureFromSampleVector(train); if (normalize) { normalize(train, features); if (validation != null) normalize(validation, features); if (test != null) normalize(test, features); } /*if(newFeatureFile.compareTo("")!=0) { System.out.print("Loading new feature description file... "); List<String> descriptions = FileUtils.readLine(newFeatureFile, "ASCII"); int taken = 0; for(int i=0;i<descriptions.size();i++) { if(descriptions.get(i).indexOf("##")==0) continue; LinearComputer lc = new LinearComputer("", descriptions.get(i)); //if we keep the orig. features ==> discard size-1 linear computer if(!keepOrigFeatures || lc.size()>1) { lcList.add(lc); taken++; if(taken == topNew) break; } //System.out.println(lc.toString()); } applyNewFeatures(train, features); applyNewFeatures(validation, features); features = applyNewFeatures(test, features); System.out.println("[Done]");//0.1195 //0.071 }*/ Ranker ranker = rFact.createRanker(type, train, features); ranker.set(trainScorer); ranker.setValidationSet(validation); ranker.init(); ranker.learn(); if (test != null) { double rankScore = evaluate(ranker, test); System.out.println(testScorer.name() + " on test data: " + SimpleMath.round(rankScore, 4)); } if (modelFile.compareTo("") != 0) { System.out.println(""); ranker.save(modelFile); System.out.println("Model saved to: " + modelFile); } }
/** * Evaluate the currently selected ranking algorithm using <data, defined features> with k-fold * cross validation. * * @param sampleFile * @param featureDefFile * @param nFold */ public void evaluate(String sampleFile, String featureDefFile, int nFold) { List<List<RankList>> trainingData = new ArrayList<List<RankList>>(); List<List<RankList>> testData = new ArrayList<List<RankList>>(); int[] features = prepareCV(sampleFile, featureDefFile, nFold, normalize, trainingData, testData); Ranker ranker = null; double origScore = 0.0; double rankScore = 0.0; double oracleScore = 0.0; for (int i = 0; i < nFold; i++) { List<RankList> train = trainingData.get(i); List<RankList> test = testData.get(i); ranker = rFact.createRanker(type, train, features); ranker.set(trainScorer); ranker.init(); ranker.learn(); double s1 = evaluate(null, test); origScore += s1; double s2 = evaluate(ranker, test); rankScore += s2; double s3 = evaluate(null, createOracles(test)); oracleScore += s3; } System.out.println( "Total: " + SimpleMath.round(origScore / nFold, 4) + "\t" + SimpleMath.round(rankScore / nFold, 4) + "\t" + SimpleMath.round(oracleScore / nFold, 4) + "\t"); }