Esempio n. 1
0
  public void main(String[] args) throws BioException, IOException {

    if (count.length != args.length) {
      System.err.println("The number of counts and output file arguments does not match!");
      System.exit(1);
    }

    OutputStream[] outputStreams;
    if ((args != null) && (args.length > 0)) {
      outputStreams = new OutputStream[args.length];
      for (int i = 0; i < args.length; i++) {
        outputStreams[i] = new BufferedOutputStream(new FileOutputStream(args[i]));
      }
    } else {
      outputStreams = new OutputStream[] {System.out};
    }

    RichSequenceIterator seqIterator =
        RichSequence.IOTools.readFastaDNA(new BufferedReader(new FileReader(seqFile)), null);

    List<Sequence> seqs = new ArrayList<Sequence>();
    while (seqIterator.hasNext()) {
      seqs.add(seqIterator.nextSequence());
    }

    List<List<Sequence>> chosenSeqs = new ArrayList<List<Sequence>>();
    if (!sampleWithReplacement) {
      for (int c : count) {
        List<Sequence> cseqs = new ArrayList<Sequence>();
        chosenSeqs.add(cseqs);
        while (c > 0) {
          int randSeqIndex = random.nextInt(seqs.size());
          cseqs.add(seqs.remove(randSeqIndex));
          c--;
        }
      }
    } else if (sampleWithReplacement || (length > 0)) {
      /*
       * if you want to sample from sequences with replacement
       * or if the wanted length is specified
       */

      for (int c : count) {
        int i = 0;
        List<Sequence> cseqs = new ArrayList<Sequence>();
        chosenSeqs.add(cseqs);

        while (c > 0) {
          Sequence randomSeq = seqs.get(random.nextInt(seqs.size()));
          if (length > 0) {
            int startPos = random.nextInt(1 + randomSeq.length() - length);
            cseqs.add(
                new SimpleSequence(
                    randomSeq.subList(startPos, startPos + length),
                    null,
                    randomSeq.getName() + "_" + i++,
                    Annotation.EMPTY_ANNOTATION));
          } else {
            cseqs.add(randomSeq);
          }
          c--;
        }
      }
    }

    int i = 0;
    for (List<Sequence> seqList : chosenSeqs) {
      for (Sequence seq : seqList) {
        Sequence s;
        if (uniqueNames) {
          s =
              new SimpleSequence(
                  seq.subList(1, seq.length()),
                  null,
                  seq.getName() + "_" + i,
                  Annotation.EMPTY_ANNOTATION);
        } else {
          s = seq;
        }
        RichSequence.IOTools.writeFasta(outputStreams[i], s, null);
        outputStreams[i].flush();
      }
      i++;
    }
    seqs = null;
  }
  /**
   * @param args
   * @throws Exception
   */
  public void main(String[] args) throws Exception {
    List<Map<String, Location>> locs = new ArrayList<Map<String, Location>>();
    for (String fileName : args) {
      locs.add(GFFUtils.gffToLocationMap(new File(fileName)));
    }

    Set<String> seqIds;
    {
      Iterator<Map<String, Location>> i = locs.iterator();
      seqIds = new HashSet<String>(i.next().keySet());
      while (i.hasNext()) {
        seqIds.retainAll(i.next().keySet());
      }
    }

    if (validate && (seqDB != null)) {
      for (Map<String, Location> ls : locs) {
        WriteCoveredSequences.validateGFFSequenceIdentifiersAgainstSequences(ls, seqDB);
      }
    }

    PrintWriter pw = null;
    GFFWriter gffw = null;
    GFFEntrySet gffEntries = null;

    if (outputFormat == Format.GFF) {
      pw = new PrintWriter(new OutputStreamWriter(System.out));
      gffw = new GFFWriter(pw);
    } else {
      gffEntries = new GFFEntrySet();
    }

    for (String id : seqIds) {
      Iterator<Map<String, Location>> i = locs.iterator();
      Location l = i.next().get(id);
      while (i.hasNext()) {
        l = LocationTools.intersection(l, i.next().get(id));
      }

      if (negate) {
        l = LocationTools.subtract(new RangeLocation(1, seqDB.getSequence(id).length()), l);
      }

      SimpleGFFRecord r = new SimpleGFFRecord();
      r.setSeqName(id);
      r.setFeature("block");
      r.setSource("nmintersectseq");
      r.setStrand(StrandedFeature.POSITIVE);

      for (Iterator<?> bi = l.blockIterator(); bi.hasNext(); ) {
        Location bloc = (Location) bi.next();
        r.setStart(bloc.getMin());
        r.setEnd(bloc.getMax());
        r.setComment("");
        r.setGroupAttributes(new HashMap<Object, Object>());
        if (gffw != null) {
          gffw.recordLine(r);
        } else {
          Sequence seq =
              new SimpleSequence(
                  seqDB.getSequence(id).subList(bloc.getMin(), bloc.getMax()),
                  null,
                  String.format("%s_|%d-%d|", id, bloc.getMin(), bloc.getMax()),
                  Annotation.EMPTY_ANNOTATION);
          RichSequence.IOTools.writeFasta(System.out, seq, null);
        }
      }
    }
    if (pw != null) pw.flush();

    if (seqDB != null) {
      System.err.println("Writing output sequences...");
      GFFTools.annotateSequences(seqDB, gffEntries);
    }
  }