Ejemplo n.º 1
0
  /*
   * Tests to make sure the caching system works.
   * Compares the results of using cached/non-cached features.
   *
   * Takes about 1 minute, depending on the authors selected and how many documents they have.
   */
  @Test
  public void scalingCached() throws Exception {
    // TODO: Tests generally should have no randomness. We should perform these tests on a set of
    // static problem sets
    // that the test uses every time.

    int num_authors = 2;
    System.out.println(num_authors);
    File source = Paths.get(JSANConstants.JSAN_CORPORA_PREFIX, "drexel_1").toFile();
    Assert.assertTrue(
        "You don't have the specified corpora available.", source.exists() && source.isDirectory());

    // This can be anything. Problem set location
    File dest = Paths.get(JSANConstants.JUNIT_RESOURCE_PACKAGE, "temp", "cache_test_.xml").toFile();

    GenericProblemSetCreator.createProblemSetsWithSize(source, dest, num_authors, 10);
    File[] problem_sets = BekahUtil.listNotHiddenFiles(dest.getParentFile());

    for (File problem_set : problem_sets) {
      // Delete the cache in the beginning
      // File to cache
      deleteRecursive(new File(JSANConstants.JSAN_CACHE), false);

      try {
        Thread.sleep(20);
      } catch (InterruptedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }

      System.out.println(problem_set);
      ProblemSet p = new ProblemSet(problem_set.getAbsolutePath());
      Set<String> authors = p.getAuthors();
      for (String a : authors) {
        System.out.print(a + ",");
      }
      System.out.println("");
      Path path =
          Paths.get(JSANConstants.JSAN_FEATURESETS_PREFIX, "writeprints_feature_set_limited.xml");
      FullAPI test =
          new FullAPI.Builder()
              .cfdPath(path.toString())
              .psPath(problem_set.getAbsolutePath())
              .setAnalyzer(
                  new WekaAnalyzer(Class.forName("weka.classifiers.functions.SMO").newInstance()))
              .numThreads(4)
              .analysisType(FullAPI.analysisType.CROSS_VALIDATION)
              .build();

      long bef1 = System.currentTimeMillis();
      test.prepareInstances();
      test.run();
      ExperimentResults r1 = test.getResults();
      long aft1 = System.currentTimeMillis();

      String result1 = r1.getStatisticsString();
      System.out.println(result1);

      FullAPI test2 =
          new FullAPI.Builder()
              .cfdPath(path.toString())
              .psPath(problem_set.getAbsolutePath())
              .setAnalyzer(
                  new WekaAnalyzer(Class.forName("weka.classifiers.functions.SMO").newInstance()))
              .numThreads(4)
              .analysisType(FullAPI.analysisType.CROSS_VALIDATION)
              .build();

      long bef2 = System.currentTimeMillis();
      test2.prepareInstances();
      test2.run();
      ExperimentResults r2 = test2.getResults();
      long aft2 = System.currentTimeMillis();
      String result2 = r2.getStatisticsString();
      System.out.println(result2);

      Assert.assertEquals("Cached results different from non-cached results", result1, result2);

      System.out.print((aft1 - bef1) / 1000 + " s\t");
      System.out.println((aft2 - bef2) / 1000 + " s");
    }
  }