예제 #1
0
  /**
   * Evaluate the currently selected ranking algorithm using percenTrain% of the training samples
   * for training the rest as validation data. Test data is specified separately.
   *
   * @param trainFile
   * @param percentTrain
   * @param testFile Empty string for "no test data"
   * @param featureDefFile
   */
  public void evaluate(
      String trainFile, double percentTrain, String testFile, String featureDefFile) {
    List<RankList> train = new ArrayList<RankList>();
    List<RankList> validation = new ArrayList<RankList>();
    int[] features =
        prepareSplit(trainFile, featureDefFile, percentTrain, normalize, train, validation);
    List<RankList> test = null;
    if (testFile.compareTo("") != 0) test = readInput(testFile);

    Ranker ranker = rFact.createRanker(type, train, features);
    ranker.set(trainScorer);
    ranker.setValidationSet(validation);
    ranker.init();
    ranker.learn();

    if (test != null) {
      double rankScore = evaluate(ranker, test);
      System.out.println(testScorer.name() + " on test data: " + SimpleMath.round(rankScore, 4));
    }
    if (modelFile.compareTo("") != 0) {
      System.out.println("");
      ranker.save(modelFile);
      System.out.println("Model saved to: " + modelFile);
    }
  }
예제 #2
0
  /**
   * Evaluate the currently selected ranking algorithm using <training data, validation data,
   * testing data and the defined features>.
   *
   * @param trainFile
   * @param validationFile
   * @param testFile
   * @param featureDefFile
   */
  public void evaluate(
      String trainFile, String validationFile, String testFile, String featureDefFile) {
    List<RankList> train = readInput(trainFile); // read input
    List<RankList> validation = null;
    if (validationFile.compareTo("") != 0) validation = readInput(validationFile);
    List<RankList> test = null;
    if (testFile.compareTo("") != 0) test = readInput(testFile);
    int[] features = readFeature(featureDefFile); // read features
    if (features == null) // no features specified ==> use all features in the training file
    features = getFeatureFromSampleVector(train);

    if (normalize) {
      normalize(train, features);
      if (validation != null) normalize(validation, features);
      if (test != null) normalize(test, features);
    }
    /*if(newFeatureFile.compareTo("")!=0)
    {
    	System.out.print("Loading new feature description file... ");
    	List<String> descriptions = FileUtils.readLine(newFeatureFile, "ASCII");
    	int taken = 0;
    	for(int i=0;i<descriptions.size();i++)
    	{
    		if(descriptions.get(i).indexOf("##")==0)
    			continue;
    		LinearComputer lc = new LinearComputer("", descriptions.get(i));
    		//if we keep the orig. features ==> discard size-1 linear computer
    		if(!keepOrigFeatures || lc.size()>1)
    		{
    			lcList.add(lc);
    			taken++;
    			if(taken == topNew)
    				break;
    		}
    		//System.out.println(lc.toString());
    	}
    	applyNewFeatures(train, features);
    	applyNewFeatures(validation, features);
    	features = applyNewFeatures(test, features);
    	System.out.println("[Done]");//0.1195
    								//0.071
    }*/

    Ranker ranker = rFact.createRanker(type, train, features);
    ranker.set(trainScorer);
    ranker.setValidationSet(validation);
    ranker.init();
    ranker.learn();

    if (test != null) {
      double rankScore = evaluate(ranker, test);
      System.out.println(testScorer.name() + " on test data: " + SimpleMath.round(rankScore, 4));
    }
    if (modelFile.compareTo("") != 0) {
      System.out.println("");
      ranker.save(modelFile);
      System.out.println("Model saved to: " + modelFile);
    }
  }
예제 #3
0
  /**
   * Evaluate the currently selected ranking algorithm using <data, defined features> with k-fold
   * cross validation.
   *
   * @param sampleFile
   * @param featureDefFile
   * @param nFold
   */
  public void evaluate(String sampleFile, String featureDefFile, int nFold) {
    List<List<RankList>> trainingData = new ArrayList<List<RankList>>();
    List<List<RankList>> testData = new ArrayList<List<RankList>>();
    int[] features =
        prepareCV(sampleFile, featureDefFile, nFold, normalize, trainingData, testData);

    Ranker ranker = null;
    double origScore = 0.0;
    double rankScore = 0.0;
    double oracleScore = 0.0;

    for (int i = 0; i < nFold; i++) {
      List<RankList> train = trainingData.get(i);
      List<RankList> test = testData.get(i);

      ranker = rFact.createRanker(type, train, features);
      ranker.set(trainScorer);
      ranker.init();
      ranker.learn();

      double s1 = evaluate(null, test);
      origScore += s1;

      double s2 = evaluate(ranker, test);
      rankScore += s2;

      double s3 = evaluate(null, createOracles(test));
      oracleScore += s3;
    }

    System.out.println(
        "Total: "
            + SimpleMath.round(origScore / nFold, 4)
            + "\t"
            + SimpleMath.round(rankScore / nFold, 4)
            + "\t"
            + SimpleMath.round(oracleScore / nFold, 4)
            + "\t");
  }