Ejemplo n.º 1
0
  public BgBrick(String sFile) {

    try {
      richSeq =
          RichSequence.IOTools.readGenbankDNA(new BufferedReader(new FileReader(sFile)), null)
              .nextRichSequence();
    } catch (Exception e) {
      e.printStackTrace();
    }
    locus = richSeq.getName();
    seqLen = richSeq.length();
    sSeq = richSeq.seqString();
  }
  @Option(
      help = "Input sequences (output the intersecting sequences rather than a new GFF file)",
      optional = true)
  public void setSeqs(File f)
      throws FileNotFoundException, ChangeVetoException, NoSuchElementException, BioException {
    this.seqsFile = f;

    this.seqDB = new HashSequenceDB();
    for (SequenceIterator si =
            RichSequence.IOTools.readFastaDNA(new BufferedReader(new FileReader(f)), null);
        si.hasNext(); ) {

      seqDB.addSequence(si.nextSequence());
    }
  }
Ejemplo n.º 3
0
  public void main(String[] args) throws BioException, IOException {

    if (count.length != args.length) {
      System.err.println("The number of counts and output file arguments does not match!");
      System.exit(1);
    }

    OutputStream[] outputStreams;
    if ((args != null) && (args.length > 0)) {
      outputStreams = new OutputStream[args.length];
      for (int i = 0; i < args.length; i++) {
        outputStreams[i] = new BufferedOutputStream(new FileOutputStream(args[i]));
      }
    } else {
      outputStreams = new OutputStream[] {System.out};
    }

    RichSequenceIterator seqIterator =
        RichSequence.IOTools.readFastaDNA(new BufferedReader(new FileReader(seqFile)), null);

    List<Sequence> seqs = new ArrayList<Sequence>();
    while (seqIterator.hasNext()) {
      seqs.add(seqIterator.nextSequence());
    }

    List<List<Sequence>> chosenSeqs = new ArrayList<List<Sequence>>();
    if (!sampleWithReplacement) {
      for (int c : count) {
        List<Sequence> cseqs = new ArrayList<Sequence>();
        chosenSeqs.add(cseqs);
        while (c > 0) {
          int randSeqIndex = random.nextInt(seqs.size());
          cseqs.add(seqs.remove(randSeqIndex));
          c--;
        }
      }
    } else if (sampleWithReplacement || (length > 0)) {
      /*
       * if you want to sample from sequences with replacement
       * or if the wanted length is specified
       */

      for (int c : count) {
        int i = 0;
        List<Sequence> cseqs = new ArrayList<Sequence>();
        chosenSeqs.add(cseqs);

        while (c > 0) {
          Sequence randomSeq = seqs.get(random.nextInt(seqs.size()));
          if (length > 0) {
            int startPos = random.nextInt(1 + randomSeq.length() - length);
            cseqs.add(
                new SimpleSequence(
                    randomSeq.subList(startPos, startPos + length),
                    null,
                    randomSeq.getName() + "_" + i++,
                    Annotation.EMPTY_ANNOTATION));
          } else {
            cseqs.add(randomSeq);
          }
          c--;
        }
      }
    }

    int i = 0;
    for (List<Sequence> seqList : chosenSeqs) {
      for (Sequence seq : seqList) {
        Sequence s;
        if (uniqueNames) {
          s =
              new SimpleSequence(
                  seq.subList(1, seq.length()),
                  null,
                  seq.getName() + "_" + i,
                  Annotation.EMPTY_ANNOTATION);
        } else {
          s = seq;
        }
        RichSequence.IOTools.writeFasta(outputStreams[i], s, null);
        outputStreams[i].flush();
      }
      i++;
    }
    seqs = null;
  }
  /**
   * @param args
   * @throws Exception
   */
  public void main(String[] args) throws Exception {
    List<Map<String, Location>> locs = new ArrayList<Map<String, Location>>();
    for (String fileName : args) {
      locs.add(GFFUtils.gffToLocationMap(new File(fileName)));
    }

    Set<String> seqIds;
    {
      Iterator<Map<String, Location>> i = locs.iterator();
      seqIds = new HashSet<String>(i.next().keySet());
      while (i.hasNext()) {
        seqIds.retainAll(i.next().keySet());
      }
    }

    if (validate && (seqDB != null)) {
      for (Map<String, Location> ls : locs) {
        WriteCoveredSequences.validateGFFSequenceIdentifiersAgainstSequences(ls, seqDB);
      }
    }

    PrintWriter pw = null;
    GFFWriter gffw = null;
    GFFEntrySet gffEntries = null;

    if (outputFormat == Format.GFF) {
      pw = new PrintWriter(new OutputStreamWriter(System.out));
      gffw = new GFFWriter(pw);
    } else {
      gffEntries = new GFFEntrySet();
    }

    for (String id : seqIds) {
      Iterator<Map<String, Location>> i = locs.iterator();
      Location l = i.next().get(id);
      while (i.hasNext()) {
        l = LocationTools.intersection(l, i.next().get(id));
      }

      if (negate) {
        l = LocationTools.subtract(new RangeLocation(1, seqDB.getSequence(id).length()), l);
      }

      SimpleGFFRecord r = new SimpleGFFRecord();
      r.setSeqName(id);
      r.setFeature("block");
      r.setSource("nmintersectseq");
      r.setStrand(StrandedFeature.POSITIVE);

      for (Iterator<?> bi = l.blockIterator(); bi.hasNext(); ) {
        Location bloc = (Location) bi.next();
        r.setStart(bloc.getMin());
        r.setEnd(bloc.getMax());
        r.setComment("");
        r.setGroupAttributes(new HashMap<Object, Object>());
        if (gffw != null) {
          gffw.recordLine(r);
        } else {
          Sequence seq =
              new SimpleSequence(
                  seqDB.getSequence(id).subList(bloc.getMin(), bloc.getMax()),
                  null,
                  String.format("%s_|%d-%d|", id, bloc.getMin(), bloc.getMax()),
                  Annotation.EMPTY_ANNOTATION);
          RichSequence.IOTools.writeFasta(System.out, seq, null);
        }
      }
    }
    if (pw != null) pw.flush();

    if (seqDB != null) {
      System.err.println("Writing output sequences...");
      GFFTools.annotateSequences(seqDB, gffEntries);
    }
  }