Beispiel #1
0
 public void rank(String modelFile, String testFile, String indriRanking) {
   Ranker ranker = rFact.loadRanker(modelFile);
   int[] features = ranker.getFeatures();
   List<RankList> test = readInput(testFile);
   if (normalize) normalize(test, features);
   try {
     BufferedWriter out =
         new BufferedWriter(new OutputStreamWriter(new FileOutputStream(indriRanking), "ASCII"));
     for (int i = 0; i < test.size(); i++) {
       RankList l = test.get(i);
       double[] scores = new double[l.size()];
       for (int j = 0; j < l.size(); j++) scores[j] = ranker.eval(l.get(j));
       int[] idx = MergeSorter.sort(scores, false);
       for (int j = 0; j < idx.length; j++) {
         int k = idx[j];
         String str =
             l.getID()
                 + " Q0 "
                 + l.get(k).getDescription().replace("#", "").trim()
                 + " "
                 + (j + 1)
                 + " "
                 + SimpleMath.round(scores[k], 5)
                 + " indri";
         out.write(str);
         out.newLine();
       }
     }
     out.close();
   } catch (Exception ex) {
     System.out.println("Error in Evaluator::rank(): " + ex.toString());
   }
 }
Beispiel #2
0
  public void test(String modelFile, String testFile) {
    Ranker ranker = rFact.loadRanker(modelFile);
    int[] features = ranker.getFeatures();
    List<RankList> test = readInput(testFile);
    if (normalize) normalize(test, features);

    double rankScore = evaluate(ranker, test);
    System.out.println(testScorer.name() + " on test data: " + SimpleMath.round(rankScore, 4));
  }
Beispiel #3
0
  public void rank(String modelFile, String testFile) {
    Ranker ranker = rFact.loadRanker(modelFile);
    int[] features = ranker.getFeatures();
    List<RankList> test = readInput(testFile);
    if (normalize) normalize(test, features);

    for (int i = 0; i < test.size(); i++) {
      RankList l = test.get(i);
      double[] scores = new double[l.size()];
      for (int j = 0; j < l.size(); j++) scores[j] = ranker.eval(l.get(j));
      int[] idx = Sorter.sort(scores, false);
      List<Integer> ll = new ArrayList<Integer>();
      for (int j = 0; j < idx.length; j++) ll.add(idx[j]);
      for (int j = 0; j < l.size(); j++) {
        int index = ll.indexOf(j) + 1;
        System.out.print(index + ((j == l.size() - 1) ? "" : " "));
      }
      System.out.println("");
    }
  }
Beispiel #4
0
 public void score(String modelFile, String testFile, String outputFile) {
   Ranker ranker = rFact.loadRanker(modelFile);
   int[] features = ranker.getFeatures();
   List<RankList> test = readInput(testFile);
   if (normalize) normalize(test, features);
   try {
     BufferedWriter out =
         new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "ASCII"));
     for (int i = 0; i < test.size(); i++) {
       RankList l = test.get(i);
       for (int j = 0; j < l.size(); j++) {
         out.write(ranker.eval(l.get(j)) + "");
         out.newLine();
       }
     }
     out.close();
   } catch (Exception ex) {
     System.out.println("Error in Evaluator::rank(): " + ex.toString());
   }
 }
Beispiel #5
0
  public void test(String modelFile, String testFile, boolean printIndividual) {
    Ranker ranker = rFact.loadRanker(modelFile);
    int[] features = ranker.getFeatures();
    List<RankList> test = readInput(testFile);
    if (normalize) normalize(test, features);

    double rankScore = 0.0;
    double score = 0.0;
    for (int i = 0; i < test.size(); i++) {
      RankList l = ranker.rank(test.get(i));
      score = testScorer.score(l);
      if (printIndividual)
        System.out.println(
            testScorer.name() + "   " + l.getID() + "   " + SimpleMath.round(score, 4));
      rankScore += score;
    }
    rankScore /= test.size();
    if (printIndividual)
      System.out.println(testScorer.name() + "   all   " + SimpleMath.round(rankScore, 4));
    else System.out.println(testScorer.name() + " on test data: " + SimpleMath.round(rankScore, 4));
  }
Beispiel #6
0
  /**
   * Evaluate the currently selected ranking algorithm using percenTrain% of the training samples
   * for training the rest as validation data. Test data is specified separately.
   *
   * @param trainFile
   * @param percentTrain
   * @param testFile Empty string for "no test data"
   * @param featureDefFile
   */
  public void evaluate(
      String trainFile, double percentTrain, String testFile, String featureDefFile) {
    List<RankList> train = new ArrayList<RankList>();
    List<RankList> validation = new ArrayList<RankList>();
    int[] features =
        prepareSplit(trainFile, featureDefFile, percentTrain, normalize, train, validation);
    List<RankList> test = null;
    if (testFile.compareTo("") != 0) test = readInput(testFile);

    Ranker ranker = rFact.createRanker(type, train, features);
    ranker.set(trainScorer);
    ranker.setValidationSet(validation);
    ranker.init();
    ranker.learn();

    if (test != null) {
      double rankScore = evaluate(ranker, test);
      System.out.println(testScorer.name() + " on test data: " + SimpleMath.round(rankScore, 4));
    }
    if (modelFile.compareTo("") != 0) {
      System.out.println("");
      ranker.save(modelFile);
      System.out.println("Model saved to: " + modelFile);
    }
  }
Beispiel #7
0
  /**
   * Evaluate the currently selected ranking algorithm using <data, defined features> with k-fold
   * cross validation.
   *
   * @param sampleFile
   * @param featureDefFile
   * @param nFold
   */
  public void evaluate(String sampleFile, String featureDefFile, int nFold) {
    List<List<RankList>> trainingData = new ArrayList<List<RankList>>();
    List<List<RankList>> testData = new ArrayList<List<RankList>>();
    int[] features =
        prepareCV(sampleFile, featureDefFile, nFold, normalize, trainingData, testData);

    Ranker ranker = null;
    double origScore = 0.0;
    double rankScore = 0.0;
    double oracleScore = 0.0;

    for (int i = 0; i < nFold; i++) {
      List<RankList> train = trainingData.get(i);
      List<RankList> test = testData.get(i);

      ranker = rFact.createRanker(type, train, features);
      ranker.set(trainScorer);
      ranker.init();
      ranker.learn();

      double s1 = evaluate(null, test);
      origScore += s1;

      double s2 = evaluate(ranker, test);
      rankScore += s2;

      double s3 = evaluate(null, createOracles(test));
      oracleScore += s3;
    }

    System.out.println(
        "Total: "
            + SimpleMath.round(origScore / nFold, 4)
            + "\t"
            + SimpleMath.round(rankScore / nFold, 4)
            + "\t"
            + SimpleMath.round(oracleScore / nFold, 4)
            + "\t");
  }
Beispiel #8
0
  /**
   * Evaluate the currently selected ranking algorithm using <training data, validation data,
   * testing data and the defined features>.
   *
   * @param trainFile
   * @param validationFile
   * @param testFile
   * @param featureDefFile
   */
  public void evaluate(
      String trainFile, String validationFile, String testFile, String featureDefFile) {
    List<RankList> train = readInput(trainFile); // read input
    List<RankList> validation = null;
    if (validationFile.compareTo("") != 0) validation = readInput(validationFile);
    List<RankList> test = null;
    if (testFile.compareTo("") != 0) test = readInput(testFile);
    int[] features = readFeature(featureDefFile); // read features
    if (features == null) // no features specified ==> use all features in the training file
    features = getFeatureFromSampleVector(train);

    if (normalize) {
      normalize(train, features);
      if (validation != null) normalize(validation, features);
      if (test != null) normalize(test, features);
    }
    /*if(newFeatureFile.compareTo("")!=0)
    {
    	System.out.print("Loading new feature description file... ");
    	List<String> descriptions = FileUtils.readLine(newFeatureFile, "ASCII");
    	int taken = 0;
    	for(int i=0;i<descriptions.size();i++)
    	{
    		if(descriptions.get(i).indexOf("##")==0)
    			continue;
    		LinearComputer lc = new LinearComputer("", descriptions.get(i));
    		//if we keep the orig. features ==> discard size-1 linear computer
    		if(!keepOrigFeatures || lc.size()>1)
    		{
    			lcList.add(lc);
    			taken++;
    			if(taken == topNew)
    				break;
    		}
    		//System.out.println(lc.toString());
    	}
    	applyNewFeatures(train, features);
    	applyNewFeatures(validation, features);
    	features = applyNewFeatures(test, features);
    	System.out.println("[Done]");//0.1195
    								//0.071
    }*/

    Ranker ranker = rFact.createRanker(type, train, features);
    ranker.set(trainScorer);
    ranker.setValidationSet(validation);
    ranker.init();
    ranker.learn();

    if (test != null) {
      double rankScore = evaluate(ranker, test);
      System.out.println(testScorer.name() + " on test data: " + SimpleMath.round(rankScore, 4));
    }
    if (modelFile.compareTo("") != 0) {
      System.out.println("");
      ranker.save(modelFile);
      System.out.println("Model saved to: " + modelFile);
    }
  }
Beispiel #9
0
 public double evaluate(Ranker ranker, List<RankList> rl) {
   List<RankList> l = rl;
   if (ranker != null) l = ranker.rank(rl);
   return testScorer.score(l);
 }