@SuppressWarnings("unchecked")
  public double crossValidationError(int n, Approach approach) throws IOException {
    List<String> queries = QueryLoader.loadQueries(queryFile);
    int partSize = queries.size() / n;
    List<List<String>> partitions = new ArrayList<>(n);
    for (int i = 0; i < (n - 1); i++) {
      partitions.add(queries.subList(i * partSize, (i + 1) * partSize));
    }
    partitions.add(queries.subList((n - 1) * partSize, queries.size()));

    Model model = readModel(MODEL_FILE);
    LOGGER.info("Generating expected counts...");
    ObjectIntOpenHashMap<String> gsResults[] = countResources(partitions, model);

    double rootMeanSquareSum = 0;
    double foldErrors[] = new double[n];
    List<String> training, predicted;
    for (int i = 0; i < n; i++) {
      LOGGER.info("Starting fold " + i + "...");
      training = generateTrainingSet(i, partitions);
      predicted = approach.generateResourceRanking(training, model);
      foldErrors[i] = RMSD.getRMSD(predicted, generateExpectedResult(i, gsResults));
      LOGGER.info("Error of fold " + i + " = " + foldErrors[i]);
      rootMeanSquareSum += foldErrors[i];
    }
    LOGGER.info("Error of folds " + Arrays.toString(foldErrors));
    return rootMeanSquareSum / n;
  }
Example #2
0
  // constructor
  public TQuery(String[] args) {
    try {
      long startTime = System.currentTimeMillis();
      processArgs(args);

      queryIndex = new QueryIndex(this);
      queryLoader = new QueryLoader(this);

      // print some stats on building the engine
      String diffTime =
          Num.formatNumberOneFraction(((double) (System.currentTimeMillis() - startTime)) / 1000);
      int numFiles = vcfDataFiles.length + bedDataFiles.length + mafDataFiles.length;
      System.err.println("\n" + diffTime + " Sec to build using " + IO.memory() + " of RAM");
      System.err.println("\t" + numFiles + "\tData sources loaded");
      System.err.println("\t" + dataSources.getRecordsLoaded() + "\tRecords indexed");
      System.err.println("\t" + dataSources.getRecordsSkipped() + "\tRecords skipped\n");

      // print summary of available filters
      System.err.println(dataSources.fetchSummary());

      queryFilesFromCmdLine();

      // release file handles
      queryLoader.closeTabixReaders();

    } catch (Exception e) {
      e.printStackTrace();
      System.err.println("\nProblem with executing the TQuery!");
    }
  }
 public double validate(Approach approach, Model model, QueryExecutor executor)
     throws IOException {
   List<String> queries = QueryLoader.loadQueries(queryFile);
   Baseline baseline = new Baseline(executor);
   return RMSD.getRMSD(
       approach.generateResourceRanking(queries, model),
       generateUriRankRangeMapping(baseline.sumUpResults(model, queries)));
 }