Esempio n. 1
0
  public static void main(String[] args) throws Throwable {
    DATASET = System.getProperty("dataset", "movielens-pos.json");
    TRAINPERCENT = Double.parseDouble(System.getProperty("trainPercent", "0.8"));
    TESTPERCENT = Double.parseDouble(System.getProperty("testPercent", "0.5"));
    RUNS = Integer.parseInt(System.getProperty("runs", "1"));
    PREDICTIONS = Integer.parseInt(System.getProperty("predictions", "1"));
    // rand = new Random( Integer.parseInt( System.getProperty( "seed", "1" ) ) );
    rand = new Random();

    System.out.println("File: " + DATASET);
    System.out.println("Train Percent: " + TRAINPERCENT);
    System.out.println("Test Percent: " + TESTPERCENT);
    System.out.println("Runs: " + RUNS);
    System.out.println("Predictions: " + PREDICTIONS);
    docs = DatasetOld.loadDataset(DATASET);

    for (Algorithm alg : algs) {
      System.out.print(alg.name + "\t");
      double total = 0.0;

      long trainTime = 0;
      long predictTime = 0;

      for (int run = 0; run < RUNS; run++) {
        ArrayList<HashMap<Integer, Double>> traindocs = new ArrayList<HashMap<Integer, Double>>();
        ArrayList<HashMap<Integer, Double>> testdocs = new ArrayList<HashMap<Integer, Double>>();

        for (HashMap<Integer, Double> doc : docs) {
          if (rand.nextDouble() < TRAINPERCENT) traindocs.add(doc);
          else testdocs.add(doc);
        }

        long startTime = System.nanoTime();
        alg.train(traindocs);
        trainTime += System.nanoTime() - startTime;

        int successes = 0;

        for (HashMap<Integer, Double> testdoc : testdocs) {
          HashMap<Integer, Double> givenwords = new HashMap<Integer, Double>();
          HashSet<Integer> testwords = new HashSet<Integer>();

          for (int word : testdoc.keySet()) {
            if (rand.nextDouble() < TESTPERCENT) {
              testwords.add(word);
            } else {
              givenwords.put(word, testdoc.get(word));
            }
          }

          startTime = System.nanoTime();
          double[] scores = alg.predict(givenwords);
          predictTime += System.nanoTime() - startTime;
          // System.out.println(Arrays.toString( scores ));
          PriorityQueue<Pair> pq = new PriorityQueue<Pair>();

          for (int i = 0; i < scores.length; i++) {
            if (givenwords.containsKey(i)) {
              continue;
            }

            if (pq.size() < PREDICTIONS) {
              pq.add(new Pair(i, scores[i]));
            }
            if (scores[i] > pq.peek().score) {
              pq.poll();
              pq.add(new Pair(i, scores[i]));
            }
          }

          while (!pq.isEmpty()) {
            Pair pair = pq.poll();
            // System.out.println( WordIndex.get( pair.word ) + "\t" + pair.score + "\t" +
            // testwords.contains( pair.word ) );
            if (testwords.contains(pair.word)) {
              successes++;
            }
          }
        }

        total += (double) successes / PREDICTIONS / testdocs.size();
      }

      System.out.println(
          total / RUNS
              + "\t"
              + (trainTime / 1000000000.0 / RUNS)
              + "\t"
              + (predictTime / 1000000000.0 / RUNS));
    }
  }