예제 #1
0
  public void parseInputFile(File inputFile) throws IOException {
    geneFeatures.clear();
    otherRecords.clear();

    try {
      GFFEntrySet gffEntries = GFFTools.readGFF(inputFile);

      Iterator itr = gffEntries.lineIterator();
      int count = 0;
      int intronFeatures = 0;
      LinkedList<GFFRecord> cdsRecs = new LinkedList<GFFRecord>();

      while (itr.hasNext()) {
        Object val = itr.next();
        if (val instanceof GFFRecord) {
          GFFRecord rec = (GFFRecord) val;
          count += 1;

          if (rec.getFeature().endsWith("gene")) {
            GeneFeatures gf = new GeneFeatures(rec);
            geneFeatures.put(gf.id, gf);
          } else if (rec.getFeature().equals("CDS")) {
            cdsRecs.addLast(rec);
          } else {
            otherRecords.add(rec);
          }
        }
      }

      for (GFFRecord rec : cdsRecs) {
        Map<String, List<String>> attrs = decodeAttrMap(rec);
        if (geneFeatures.containsKey(attrs.get("Parent").get(0))) {
          geneFeatures.get(attrs.get("Parent").get(0)).addCDS(rec, attrs);
        } else {
          System.err.println("Unknown CDS Parent: " + attrs.get("Parent").get(0));
        }
      }

      for (String k : geneFeatures.keySet()) {
        GeneFeatures gf = geneFeatures.get(k);
        if (gf.cds != null && gf.cds.size() > 1) {
          intronFeatures++;
        }
      }

      System.err.println("# GFF Records: " + count);
      System.err.println("# Gene Feature Sets: " + geneFeatures.size());
      System.err.println("\t# Intron-Features: " + intronFeatures);

    } catch (ParserException e) {
      e.printStackTrace();
    } catch (BioException e) {
      e.printStackTrace();
    }
  }
  /**
   * @param args
   * @throws Exception
   */
  public void main(String[] args) throws Exception {
    List<Map<String, Location>> locs = new ArrayList<Map<String, Location>>();
    for (String fileName : args) {
      locs.add(GFFUtils.gffToLocationMap(new File(fileName)));
    }

    Set<String> seqIds;
    {
      Iterator<Map<String, Location>> i = locs.iterator();
      seqIds = new HashSet<String>(i.next().keySet());
      while (i.hasNext()) {
        seqIds.retainAll(i.next().keySet());
      }
    }

    if (validate && (seqDB != null)) {
      for (Map<String, Location> ls : locs) {
        WriteCoveredSequences.validateGFFSequenceIdentifiersAgainstSequences(ls, seqDB);
      }
    }

    PrintWriter pw = null;
    GFFWriter gffw = null;
    GFFEntrySet gffEntries = null;

    if (outputFormat == Format.GFF) {
      pw = new PrintWriter(new OutputStreamWriter(System.out));
      gffw = new GFFWriter(pw);
    } else {
      gffEntries = new GFFEntrySet();
    }

    for (String id : seqIds) {
      Iterator<Map<String, Location>> i = locs.iterator();
      Location l = i.next().get(id);
      while (i.hasNext()) {
        l = LocationTools.intersection(l, i.next().get(id));
      }

      if (negate) {
        l = LocationTools.subtract(new RangeLocation(1, seqDB.getSequence(id).length()), l);
      }

      SimpleGFFRecord r = new SimpleGFFRecord();
      r.setSeqName(id);
      r.setFeature("block");
      r.setSource("nmintersectseq");
      r.setStrand(StrandedFeature.POSITIVE);

      for (Iterator<?> bi = l.blockIterator(); bi.hasNext(); ) {
        Location bloc = (Location) bi.next();
        r.setStart(bloc.getMin());
        r.setEnd(bloc.getMax());
        r.setComment("");
        r.setGroupAttributes(new HashMap<Object, Object>());
        if (gffw != null) {
          gffw.recordLine(r);
        } else {
          Sequence seq =
              new SimpleSequence(
                  seqDB.getSequence(id).subList(bloc.getMin(), bloc.getMax()),
                  null,
                  String.format("%s_|%d-%d|", id, bloc.getMin(), bloc.getMax()),
                  Annotation.EMPTY_ANNOTATION);
          RichSequence.IOTools.writeFasta(System.out, seq, null);
        }
      }
    }
    if (pw != null) pw.flush();

    if (seqDB != null) {
      System.err.println("Writing output sequences...");
      GFFTools.annotateSequences(seqDB, gffEntries);
    }
  }