public TableSort() throws DiffException { config = ComparisonConfig.getInstance(); }
public long sort( String inputFile, String outputFile, String tmpdirectory, long numberLinePerFile, int maxNumberOfFile, boolean saveKey, boolean keepTmp, String delimiter, char escape, KeyGenerator keyGenerator, Filter filter) throws DiffException { if (maxNumberOfFile > MAX_FILE_NUMBER) { throw new DiffException("Maxium number of files can't exceed " + MAX_FILE_NUMBER); } try { List<File> tmpFileList = new ArrayList<File>(); // first step: read input table, split into sorted temp tables TxtTable table = new TxtTable(inputFile, keyGenerator, filter, false, delimiter, escape); long curSplitRowNo = 0; int fileCount = 0; table.setPrimaryFields(config.getLeftPrimaryFields()); String tableTile = table.getTitleRow().getContent(); TxtTable.Row row = table.getNextRow(); MemTable tmpTable = new MemTable(); while (row != null && fileCount < maxNumberOfFile) { tmpTable.addRow(row); curSplitRowNo++; row = table.getNextRow(); if (curSplitRowNo >= numberLinePerFile || row == null) { File newtmpfile = File.createTempFile("sort_tmp", "flatfile", new File(tmpdirectory)); if (!keepTmp) newtmpfile.deleteOnExit(); tmpFileList.add(newtmpfile); tmpTable.sortAndSave(newtmpfile); fileCount++; if (row != null) { tmpTable = new MemTable(); curSplitRowNo = 0; } } } if (fileCount == maxNumberOfFile) { throw new DiffException("File count exceed maximum allowed count."); } table.closeFile(); // second step: merge sort return mergeSortedFiles( tmpFileList, new File(outputFile), tableTile, new Comparator<TxtTable.Row>() { public int compare(TxtTable.Row i, TxtTable.Row j) { return i.compareTo(j); } }, delimiter, escape, saveKey); } catch (IOException e) { throw new DiffException(e); } }
public TableSort(String configFile) throws DiffException { this(); config.init(configFile); }