public static void main(String[] args) { try { String inFile = "/psychipc01/disk2/references/1000Genome/release/20130502_v5a/ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz"; String outFile = "/psychipc01/disk2/references/1000Genome/release/20130502_v5a/ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes1.vcf.gz"; BlockCompressedInputStream br = new BlockCompressedInputStream(new File(inFile)); BlockCompressedOutputStream bw = new BlockCompressedOutputStream(new File(outFile)); String line = null; String[] cells = null; int[] orgIndices = new int[] { 0, 1, 2, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 25, 33, 34, 35, 36, 37, 38, 39, 40 }; int selectedColNum = orgIndices.length; int i, pos; String delimiter = "\t"; while ((line = br.readLine()) != null) { line = line.trim(); if (line.trim().length() == 0) { continue; } bw.write(line.replaceAll("[|]", "/").getBytes()); bw.write("\n".getBytes()); } bw.close(); br.close(); } catch (Exception ex) { ex.printStackTrace(); } }
@Override public void run() { if (myStartSite >= mySiteCount) { return; } BlockCompressedInputStream reader = getReader(); try { reader.seek(myIndex.virtualOffset(mySeekIndex)); int numSites = Math.min(myNumLinesPerInterval, mySiteCount - myStartSite); byte[][] result = new byte[numSites][]; for (int i = 0; i < numSites; i++) { result[i] = parseLine(reader.readLine(), myTaxaCount, myStartSite + i, myIsOneLetter); CompletableFuture<byte[]> future = myFutureQueue.remove(myStartSite + i); if (future != null) { future.complete(result[i]); } } myGenoCache.put(myProcessBlock, result); // This get to prevent early eviction from cache myGenoCache.getIfPresent(myProcessBlock); myCurrentlyProcessingBlocks.remove(myProcessBlock); for (int i = 0; i < numSites; i++) { CompletableFuture<byte[]> future = myFutureQueue.remove(myStartSite + i); if (future != null) { future.complete(result[i]); } } myStartSite += myNumLinesPerInterval; if (myStartSite >= mySiteCount) { return; } for (int b = 1; b < NUM_LOOK_AHEAD_BLOCKS; b++) { if (myGenoCache.getIfPresent(myProcessBlock + b) != null) { return; } if (!myCurrentlyProcessingBlocks.add(myProcessBlock + b)) { return; } numSites = Math.min(myNumLinesPerInterval, mySiteCount - myStartSite); result = new byte[numSites][]; for (int i = 0; i < numSites; i++) { result[i] = parseLine(reader.readLine(), myTaxaCount, myStartSite + i, myIsOneLetter); } myGenoCache.put(myProcessBlock + b, result); // This get to prevent early eviction from cache myGenoCache.getIfPresent(myProcessBlock + b); myCurrentlyProcessingBlocks.remove(myProcessBlock + b); for (int i = 0; i < numSites; i++) { CompletableFuture<byte[]> future = myFutureQueue.remove(myStartSite + i); if (future != null) { future.complete(result[i]); } } myStartSite += myNumLinesPerInterval; if (myStartSite >= mySiteCount) { return; } } } catch (Exception e) { myLogger.error(e.getMessage(), e); } finally { myReaders.add(reader); } }