Example #1
0
  /**
   * Sorts the file.
   *
   * @param inFile N-Triples file to sort
   * @param outFile File to store results in
   * @throws IOException
   * @throws RDFHandlerException
   * @throws InterruptedException
   */
  public static void sort(File inFile, File outFile)
      throws IOException, RDFHandlerException, InterruptedException {

    // Get length of file
    System.out.println("Counting statements");
    long fLength = LineCounter.countLines(inFile.getAbsolutePath());
    System.out.println(fLength);
    if (fLength < MAX_PERMIT_FILE_SIZE) {
      System.out.println("Use in-memory sorting approach.");
      // quick in-memory sort
      quickInMemorySort(inFile, outFile, new StatementsComparatorSPO());
    } else {
      System.out.println("Use extensive sorting approach.");
      // extensive sort
      // create a workspace folder in user.home
      String workspaceName = String.valueOf(System.currentTimeMillis());
      File workspace = new File(".", workspaceName);
      workspace.mkdir();
      System.out.println("Using " + workspace.getAbsolutePath() + " as workspace.");

      // calculate number of files needed
      int fileCount = 1;
      while (fLength / fileCount > MAX_PERMIT_FILE_SIZE) {
        fileCount *= 2;
      }
      System.out.println("Starting with " + fileCount + " level 0 files.");

      MultithreadMerger merger = new MultithreadMerger(fileCount, outFile, workspace);
      readSplitSort(inFile, fLength, workspace, merger);
      System.out.println("All level 0 files habe been processed.");
      merger.waitForIt();

      deleteFlatDir(workspace);
    }
    System.out.println("Complete");
  }
Example #2
0
  /**
   * Reads all statments from the readers and writes them sorted into .nt-Files.
   *
   * @param pReaderArr Array of readers
   * @param nrOfStmts Number of statments in inFile
   * @param workspace Directory where to store intermediate results.
   * @throws RDFHandlerException
   * @throws IOException
   */
  private static void readSplitSort(
      File inFile, long nrOfStmts, File workspace, MultithreadMerger merger)
      throws RDFHandlerException, IOException {

    // calculate nr of files needed to be under permissable file size; nr needs to be power of two
    int fileCount = 1;
    while (nrOfStmts / fileCount > MAX_PERMIT_FILE_SIZE) {
      fileCount *= 2;
    }

    // calculate number of statement in first fileCount-1 files
    int effectiveStmtsPerFile = (int) (nrOfStmts / fileCount);

    final Statement[] buffer =
        new Statement[effectiveStmtsPerFile + fileCount]; // Stores read statements

    // help vars
    int fileNr = 0;

    // read for first n files
    PullReader pReader = new PullReader(inFile);
    pReader.load();
    for (int i = 0; i < fileCount - 1; i++) {
      for (int j = 0; j < effectiveStmtsPerFile; j++) {
        Statement stmt = pReader.peek();
        pReader.removeHead();
        buffer[j] = stmt;
      }
      Arrays.sort(buffer, 0, effectiveStmtsPerFile, new StatementsComparatorSPO());
      File file = new File(workspace, "lv0_" + ++fileNr);
      try {
        writeBuffer(buffer, 0, effectiveStmtsPerFile, file);
        merger.registerFile(file, 0);
      } catch (RDFHandlerException ex) {
        throw new RDFHandlerException("When writing file " + file.getName(), ex);
      } catch (IOException ex) {
        throw new IOException("When writing file " + file.getName(), ex);
      }
    }

    // read for last file
    int idx = 0;
    while (!pReader.isEmpty()) {
      Statement stmt = pReader.peek();
      pReader.removeHead();
      buffer[idx] = stmt;
      idx++;
    }

    Arrays.sort(buffer, 0, idx, new StatementsComparatorSPO());
    File file = new File(workspace, "lv0_" + ++fileNr);
    try {
      writeBuffer(buffer, 0, idx - 1, file);
      merger.registerFile(file, 0);
    } catch (RDFHandlerException ex) {
      throw new RDFHandlerException("When writing file " + file.getName(), ex);
    } catch (IOException ex) {
      throw new IOException("When writing file " + file.getName(), ex);
    }
    pReader.close();
  }