コード例 #1
0
ファイル: TableSort.java プロジェクト: simba2013/tableDiff
  public long sort(
      String inputFile,
      String outputFile,
      String tmpdirectory,
      long numberLinePerFile,
      int maxNumberOfFile,
      boolean saveKey,
      boolean keepTmp,
      String delimiter,
      char escape,
      KeyGenerator keyGenerator,
      Filter filter)
      throws DiffException {
    if (maxNumberOfFile > MAX_FILE_NUMBER) {
      throw new DiffException("Maxium number of files can't exceed " + MAX_FILE_NUMBER);
    }

    try {
      List<File> tmpFileList = new ArrayList<File>();

      // first step: read input table, split into sorted temp tables

      TxtTable table = new TxtTable(inputFile, keyGenerator, filter, false, delimiter, escape);
      long curSplitRowNo = 0;
      int fileCount = 0;

      table.setPrimaryFields(config.getLeftPrimaryFields());

      String tableTile = table.getTitleRow().getContent();
      TxtTable.Row row = table.getNextRow();

      MemTable tmpTable = new MemTable();

      while (row != null && fileCount < maxNumberOfFile) {
        tmpTable.addRow(row);
        curSplitRowNo++;

        row = table.getNextRow();

        if (curSplitRowNo >= numberLinePerFile || row == null) {
          File newtmpfile = File.createTempFile("sort_tmp", "flatfile", new File(tmpdirectory));

          if (!keepTmp) newtmpfile.deleteOnExit();

          tmpFileList.add(newtmpfile);

          tmpTable.sortAndSave(newtmpfile);
          fileCount++;

          if (row != null) {
            tmpTable = new MemTable();
            curSplitRowNo = 0;
          }
        }
      }

      if (fileCount == maxNumberOfFile) {
        throw new DiffException("File count exceed maximum allowed count.");
      }

      table.closeFile();

      // second step: merge sort

      return mergeSortedFiles(
          tmpFileList,
          new File(outputFile),
          tableTile,
          new Comparator<TxtTable.Row>() {
            public int compare(TxtTable.Row i, TxtTable.Row j) {
              return i.compareTo(j);
            }
          },
          delimiter,
          escape,
          saveKey);
    } catch (IOException e) {
      throw new DiffException(e);
    }
  }
コード例 #2
0
ファイル: TableSort.java プロジェクト: simba2013/tableDiff
  public long mergeSortedFiles(
      List<File> files,
      File outputfile,
      String tableTitle,
      final Comparator<TxtTable.Row> cmp,
      String delimiter,
      char escape,
      boolean saveKey)
      throws IOException {
    PriorityQueue<TxtTable> pq =
        new PriorityQueue<TxtTable>(
            11,
            new Comparator<TxtTable>() {
              public int compare(TxtTable i, TxtTable j) {
                return cmp.compare(i.peek(), j.peek());
              }
            });

    if (delimiter == null) delimiter = TxtTable.DEFAULT_DELIMITER;

    for (File f : files) {
      TxtTable table =
          new TxtTable(
              f.getAbsolutePath(), new FirstRowKeyGenerator(), null, true, delimiter, escape);

      table.reload();
      pq.add(table);
    }

    BufferedWriter fbw =
        new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputfile)));

    if (saveKey) fbw.write("ID_ID" + delimiter);

    fbw.write(tableTitle);
    fbw.newLine();

    int rowcounter = 0;

    try {
      while (pq.size() > 0) {
        TxtTable table = pq.poll();
        TxtTable.Row r = table.pop();

        if (saveKey) fbw.write(r.getContent());
        else fbw.write(removeFirstElement(r.getContent(), delimiter));

        fbw.newLine();

        ++rowcounter;

        if (table.empty()) {
          table.closeFile();
          table.getFile().delete();
        } else {
          pq.add(table); // add it back
        }
      }
    } finally {
      fbw.close();
      for (TxtTable table : pq) table.closeFile();
    }

    return rowcounter;
  }