Exemplo n.º 1
0
  public FastaIndexedSequence(String path) throws IOException {

    this.path = path;
    contentLength = ParsingUtils.getContentLength(path);

    String indexPath = path + ".fai";

    // The check below is not useful in the files have been copied or moved, which is always the
    // case for our hosted
    // genomes.   It causes lots of spurious warnings
    //        if(ParsingUtils.getLastModified(path) > ParsingUtils.getLastModified(indexPath)){
    //            log.warn("Index file for " + path + " is older than the file it indexes");
    //        }

    index = new FastaIndex(indexPath);
    chromoNamesList = new ArrayList<String>(index.getSequenceNames());
  }
Exemplo n.º 2
0
  /**
   * Return the sequence for the query interval as a byte array. Coordinates are "ucsc" style (0
   * based)
   *
   * <p>Example: 5 bases per line, 6 bytes per line
   *
   * <p>Bases 0 1 2 3 4 * | 5 6 7 8 9 * | 10 11 12 13 14 * etc Offset 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4
   * Bytes 0 1 2 3 4 5 | 6 7 8 9 10 | 11 12 13 14 15 16
   *
   * <p>query 9 - 13 start line = 1 base0 = 1*5 = 5 offset = (9 - 5) = 4 start byte = (1*6) + 3 = 10
   * end line = 2
   *
   * @param chr
   * @param qstart
   * @param qend
   * @return
   */
  public byte[] getSequence(String chr, int qstart, int qend) {

    FastaIndex.FastaSequenceIndexEntry idxEntry = index.getIndexEntry(chr);
    if (idxEntry == null) {
      return null;
    }

    try {

      final int start = Math.max(0, qstart); // qstart should never be < 0
      final int end = Math.min((int) idxEntry.getSize(), qend);

      final int bytesPerLine = idxEntry.getBytesPerLine();
      final int basesPerLine = idxEntry.getBasesPerLine();
      int nEndBytes = bytesPerLine - basesPerLine;

      int startLine = start / basesPerLine;
      int endLine = end / basesPerLine;

      int base0 = startLine * basesPerLine; // Base at beginning of start line

      int offset = start - base0;
      final long position = idxEntry.getPosition();
      long startByte = position + startLine * bytesPerLine + offset;

      int base1 = endLine * basesPerLine;
      int offset1 = end - base1;
      long endByte = Math.min(contentLength, position + endLine * bytesPerLine + offset1);

      if (startByte >= endByte) {
        return null;
      }

      // Read all the bytes in the range.  This will include endline characters
      byte[] allBytes = readBytes(startByte, endByte);

      // Create the array for the sequence -- this will be "allBytes" without the endline
      // characters.
      ByteArrayOutputStream bos = new ByteArrayOutputStream(end - start);

      int srcPos = 0;
      int desPos = 0;
      // Copy first line
      final int allBytesLength = allBytes.length;
      if (offset > 0) {
        int nBases = Math.min(end - start, basesPerLine - offset);
        bos.write(allBytes, srcPos, nBases);
        srcPos += (nBases + nEndBytes);
        desPos += nBases;
      }

      while (srcPos < allBytesLength) {
        int nBases = Math.min(basesPerLine, allBytesLength - srcPos);
        bos.write(allBytes, srcPos, nBases);
        srcPos += (nBases + nEndBytes);
        desPos += nBases;
      }

      return bos.toByteArray();

    } catch (IOException e) {
      e
          .printStackTrace(); // To change body of catch statement use File | Settings | File
                              // Templates.

      return null;
    }
  }
Exemplo n.º 3
0
 @Override
 public int getChromosomeLength(String chrname) {
   return index.getSequenceSize(chrname);
 }