@SuppressWarnings("unchecked") public double crossValidationError(int n, Approach approach) throws IOException { List<String> queries = QueryLoader.loadQueries(queryFile); int partSize = queries.size() / n; List<List<String>> partitions = new ArrayList<>(n); for (int i = 0; i < (n - 1); i++) { partitions.add(queries.subList(i * partSize, (i + 1) * partSize)); } partitions.add(queries.subList((n - 1) * partSize, queries.size())); Model model = readModel(MODEL_FILE); LOGGER.info("Generating expected counts..."); ObjectIntOpenHashMap<String> gsResults[] = countResources(partitions, model); double rootMeanSquareSum = 0; double foldErrors[] = new double[n]; List<String> training, predicted; for (int i = 0; i < n; i++) { LOGGER.info("Starting fold " + i + "..."); training = generateTrainingSet(i, partitions); predicted = approach.generateResourceRanking(training, model); foldErrors[i] = RMSD.getRMSD(predicted, generateExpectedResult(i, gsResults)); LOGGER.info("Error of fold " + i + " = " + foldErrors[i]); rootMeanSquareSum += foldErrors[i]; } LOGGER.info("Error of folds " + Arrays.toString(foldErrors)); return rootMeanSquareSum / n; }
// constructor public TQuery(String[] args) { try { long startTime = System.currentTimeMillis(); processArgs(args); queryIndex = new QueryIndex(this); queryLoader = new QueryLoader(this); // print some stats on building the engine String diffTime = Num.formatNumberOneFraction(((double) (System.currentTimeMillis() - startTime)) / 1000); int numFiles = vcfDataFiles.length + bedDataFiles.length + mafDataFiles.length; System.err.println("\n" + diffTime + " Sec to build using " + IO.memory() + " of RAM"); System.err.println("\t" + numFiles + "\tData sources loaded"); System.err.println("\t" + dataSources.getRecordsLoaded() + "\tRecords indexed"); System.err.println("\t" + dataSources.getRecordsSkipped() + "\tRecords skipped\n"); // print summary of available filters System.err.println(dataSources.fetchSummary()); queryFilesFromCmdLine(); // release file handles queryLoader.closeTabixReaders(); } catch (Exception e) { e.printStackTrace(); System.err.println("\nProblem with executing the TQuery!"); } }
public double validate(Approach approach, Model model, QueryExecutor executor) throws IOException { List<String> queries = QueryLoader.loadQueries(queryFile); Baseline baseline = new Baseline(executor); return RMSD.getRMSD( approach.generateResourceRanking(queries, model), generateUriRankRangeMapping(baseline.sumUpResults(model, queries))); }