コード例 #1
0
ファイル: VcfIndexTree.java プロジェクト: pcingola/SnpSift
  /**
   * Index entries in VcfIndexDataChromo
   *
   * @return Index of added item (-1 if no item was added)
   */
  int build(TIntArrayList idxs) {
    if (idxs.isEmpty()) return -1;

    // Find middle position
    // Note:If we mode the 'mid' point by one base, the probability of intersecting
    // an interval is significantly reduced (most entries are SNPs). This reduces
    // the index size, the number of 'file.seek()' operations and speeds up the index.
    int center = mean(idxs);
    int firstStart = vcfIndexChromo.getStart(idxs.get(0));
    if (center > firstStart) center--;

    // Index of entry to be added
    int idx = nextEntry();

    // Split indexes into left, right and intersecting
    TIntArrayList left = new TIntArrayList();
    TIntArrayList right = new TIntArrayList();
    TIntArrayList intersecting = new TIntArrayList();

    // Try to collapse consecutive entries if there are only a few (i.e. less
    // than COLLAPSE_MAX_NUM_ENTRIES) or the block size is small (less
    // than COLLAPSE_MAX_BLOCK_SIZE bytes)
    if (consecutiveFileBlock(idxs)
        && //
        ((idxs.size() < COLLAPSE_MAX_NUM_ENTRIES)
            || (consecutiveFileBlockSize(idxs) < maxBlockSize)) //
    ) {
      // Too few intervals forming a consecutive block?
      // Just add them to the intersect
      for (int i = 0; i < idxs.size(); i++) {
        int j = idxs.get(i);
        intersecting.add(j);
      }
    } else {
      // Add indexes into left, right and intersecting
      for (int i = 0; i < idxs.size(); i++) {
        int j = idxs.get(i);

        if (vcfIndexChromo.getEnd(j) < center) left.add(j);
        else if (vcfIndexChromo.getStart(j) > center) right.add(j);
        else intersecting.add(j);
      }
    }

    // Recurse
    int leftIdx = build(left);
    int rightIdx = build(right);

    // Create this entry
    set(idx, leftIdx, rightIdx, center, intersecting);

    return idx;
  }
コード例 #2
0
ファイル: VcfIndexTree.java プロジェクト: pcingola/SnpSift
  /** Mean coordinates from entries indexed by 'idxs' */
  int mean(TIntArrayList idxs) {
    if (idxs.isEmpty()) return 0;

    TIntArrayList coordinates = new TIntArrayList(2 * idxs.size());
    for (int i = 0; i < idxs.size(); i++) {
      int idx = idxs.get(i);

      coordinates.add(vcfIndexChromo.getStart(idx));
      coordinates.add(vcfIndexChromo.getEnd(idx));
    }
    coordinates.sort();

    return coordinates.get(coordinates.size() / 2);
  }