/** * Sorts the file. * * @param inFile N-Triples file to sort * @param outFile File to store results in * @throws IOException * @throws RDFHandlerException * @throws InterruptedException */ public static void sort(File inFile, File outFile) throws IOException, RDFHandlerException, InterruptedException { // Get length of file System.out.println("Counting statements"); long fLength = LineCounter.countLines(inFile.getAbsolutePath()); System.out.println(fLength); if (fLength < MAX_PERMIT_FILE_SIZE) { System.out.println("Use in-memory sorting approach."); // quick in-memory sort quickInMemorySort(inFile, outFile, new StatementsComparatorSPO()); } else { System.out.println("Use extensive sorting approach."); // extensive sort // create a workspace folder in user.home String workspaceName = String.valueOf(System.currentTimeMillis()); File workspace = new File(".", workspaceName); workspace.mkdir(); System.out.println("Using " + workspace.getAbsolutePath() + " as workspace."); // calculate number of files needed int fileCount = 1; while (fLength / fileCount > MAX_PERMIT_FILE_SIZE) { fileCount *= 2; } System.out.println("Starting with " + fileCount + " level 0 files."); MultithreadMerger merger = new MultithreadMerger(fileCount, outFile, workspace); readSplitSort(inFile, fLength, workspace, merger); System.out.println("All level 0 files habe been processed."); merger.waitForIt(); deleteFlatDir(workspace); } System.out.println("Complete"); }
/** * Reads all statments from the readers and writes them sorted into .nt-Files. * * @param pReaderArr Array of readers * @param nrOfStmts Number of statments in inFile * @param workspace Directory where to store intermediate results. * @throws RDFHandlerException * @throws IOException */ private static void readSplitSort( File inFile, long nrOfStmts, File workspace, MultithreadMerger merger) throws RDFHandlerException, IOException { // calculate nr of files needed to be under permissable file size; nr needs to be power of two int fileCount = 1; while (nrOfStmts / fileCount > MAX_PERMIT_FILE_SIZE) { fileCount *= 2; } // calculate number of statement in first fileCount-1 files int effectiveStmtsPerFile = (int) (nrOfStmts / fileCount); final Statement[] buffer = new Statement[effectiveStmtsPerFile + fileCount]; // Stores read statements // help vars int fileNr = 0; // read for first n files PullReader pReader = new PullReader(inFile); pReader.load(); for (int i = 0; i < fileCount - 1; i++) { for (int j = 0; j < effectiveStmtsPerFile; j++) { Statement stmt = pReader.peek(); pReader.removeHead(); buffer[j] = stmt; } Arrays.sort(buffer, 0, effectiveStmtsPerFile, new StatementsComparatorSPO()); File file = new File(workspace, "lv0_" + ++fileNr); try { writeBuffer(buffer, 0, effectiveStmtsPerFile, file); merger.registerFile(file, 0); } catch (RDFHandlerException ex) { throw new RDFHandlerException("When writing file " + file.getName(), ex); } catch (IOException ex) { throw new IOException("When writing file " + file.getName(), ex); } } // read for last file int idx = 0; while (!pReader.isEmpty()) { Statement stmt = pReader.peek(); pReader.removeHead(); buffer[idx] = stmt; idx++; } Arrays.sort(buffer, 0, idx, new StatementsComparatorSPO()); File file = new File(workspace, "lv0_" + ++fileNr); try { writeBuffer(buffer, 0, idx - 1, file); merger.registerFile(file, 0); } catch (RDFHandlerException ex) { throw new RDFHandlerException("When writing file " + file.getName(), ex); } catch (IOException ex) { throw new IOException("When writing file " + file.getName(), ex); } pReader.close(); }