예제 #1
0
  public static void main(String[] args) {
    try {
      String inFile =
          "/psychipc01/disk2/references/1000Genome/release/20130502_v5a/ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz";
      String outFile =
          "/psychipc01/disk2/references/1000Genome/release/20130502_v5a/ALL.chr1.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes1.vcf.gz";
      BlockCompressedInputStream br = new BlockCompressedInputStream(new File(inFile));
      BlockCompressedOutputStream bw = new BlockCompressedOutputStream(new File(outFile));
      String line = null;
      String[] cells = null;

      int[] orgIndices =
          new int[] {
            0, 1, 2, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 25, 33, 34, 35, 36, 37, 38,
            39, 40
          };
      int selectedColNum = orgIndices.length;
      int i, pos;
      String delimiter = "\t";

      while ((line = br.readLine()) != null) {
        line = line.trim();
        if (line.trim().length() == 0) {
          continue;
        }

        bw.write(line.replaceAll("[|]", "/").getBytes());
        bw.write("\n".getBytes());
      }
      bw.close();
      br.close();

    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
    @Override
    public void run() {

      if (myStartSite >= mySiteCount) {
        return;
      }

      BlockCompressedInputStream reader = getReader();
      try {

        reader.seek(myIndex.virtualOffset(mySeekIndex));

        int numSites = Math.min(myNumLinesPerInterval, mySiteCount - myStartSite);
        byte[][] result = new byte[numSites][];
        for (int i = 0; i < numSites; i++) {
          result[i] = parseLine(reader.readLine(), myTaxaCount, myStartSite + i, myIsOneLetter);
          CompletableFuture<byte[]> future = myFutureQueue.remove(myStartSite + i);
          if (future != null) {
            future.complete(result[i]);
          }
        }
        myGenoCache.put(myProcessBlock, result);
        // This get to prevent early eviction from cache
        myGenoCache.getIfPresent(myProcessBlock);
        myCurrentlyProcessingBlocks.remove(myProcessBlock);
        for (int i = 0; i < numSites; i++) {
          CompletableFuture<byte[]> future = myFutureQueue.remove(myStartSite + i);
          if (future != null) {
            future.complete(result[i]);
          }
        }
        myStartSite += myNumLinesPerInterval;
        if (myStartSite >= mySiteCount) {
          return;
        }

        for (int b = 1; b < NUM_LOOK_AHEAD_BLOCKS; b++) {

          if (myGenoCache.getIfPresent(myProcessBlock + b) != null) {
            return;
          }
          if (!myCurrentlyProcessingBlocks.add(myProcessBlock + b)) {
            return;
          }

          numSites = Math.min(myNumLinesPerInterval, mySiteCount - myStartSite);
          result = new byte[numSites][];
          for (int i = 0; i < numSites; i++) {
            result[i] = parseLine(reader.readLine(), myTaxaCount, myStartSite + i, myIsOneLetter);
          }
          myGenoCache.put(myProcessBlock + b, result);
          // This get to prevent early eviction from cache
          myGenoCache.getIfPresent(myProcessBlock + b);
          myCurrentlyProcessingBlocks.remove(myProcessBlock + b);
          for (int i = 0; i < numSites; i++) {
            CompletableFuture<byte[]> future = myFutureQueue.remove(myStartSite + i);
            if (future != null) {
              future.complete(result[i]);
            }
          }
          myStartSite += myNumLinesPerInterval;
          if (myStartSite >= mySiteCount) {
            return;
          }
        }

      } catch (Exception e) {
        myLogger.error(e.getMessage(), e);
      } finally {
        myReaders.add(reader);
      }
    }