public void parseInputFile(File inputFile) throws IOException { geneFeatures.clear(); otherRecords.clear(); try { GFFEntrySet gffEntries = GFFTools.readGFF(inputFile); Iterator itr = gffEntries.lineIterator(); int count = 0; int intronFeatures = 0; LinkedList<GFFRecord> cdsRecs = new LinkedList<GFFRecord>(); while (itr.hasNext()) { Object val = itr.next(); if (val instanceof GFFRecord) { GFFRecord rec = (GFFRecord) val; count += 1; if (rec.getFeature().endsWith("gene")) { GeneFeatures gf = new GeneFeatures(rec); geneFeatures.put(gf.id, gf); } else if (rec.getFeature().equals("CDS")) { cdsRecs.addLast(rec); } else { otherRecords.add(rec); } } } for (GFFRecord rec : cdsRecs) { Map<String, List<String>> attrs = decodeAttrMap(rec); if (geneFeatures.containsKey(attrs.get("Parent").get(0))) { geneFeatures.get(attrs.get("Parent").get(0)).addCDS(rec, attrs); } else { System.err.println("Unknown CDS Parent: " + attrs.get("Parent").get(0)); } } for (String k : geneFeatures.keySet()) { GeneFeatures gf = geneFeatures.get(k); if (gf.cds != null && gf.cds.size() > 1) { intronFeatures++; } } System.err.println("# GFF Records: " + count); System.err.println("# Gene Feature Sets: " + geneFeatures.size()); System.err.println("\t# Intron-Features: " + intronFeatures); } catch (ParserException e) { e.printStackTrace(); } catch (BioException e) { e.printStackTrace(); } }
/** * @param args * @throws Exception */ public void main(String[] args) throws Exception { List<Map<String, Location>> locs = new ArrayList<Map<String, Location>>(); for (String fileName : args) { locs.add(GFFUtils.gffToLocationMap(new File(fileName))); } Set<String> seqIds; { Iterator<Map<String, Location>> i = locs.iterator(); seqIds = new HashSet<String>(i.next().keySet()); while (i.hasNext()) { seqIds.retainAll(i.next().keySet()); } } if (validate && (seqDB != null)) { for (Map<String, Location> ls : locs) { WriteCoveredSequences.validateGFFSequenceIdentifiersAgainstSequences(ls, seqDB); } } PrintWriter pw = null; GFFWriter gffw = null; GFFEntrySet gffEntries = null; if (outputFormat == Format.GFF) { pw = new PrintWriter(new OutputStreamWriter(System.out)); gffw = new GFFWriter(pw); } else { gffEntries = new GFFEntrySet(); } for (String id : seqIds) { Iterator<Map<String, Location>> i = locs.iterator(); Location l = i.next().get(id); while (i.hasNext()) { l = LocationTools.intersection(l, i.next().get(id)); } if (negate) { l = LocationTools.subtract(new RangeLocation(1, seqDB.getSequence(id).length()), l); } SimpleGFFRecord r = new SimpleGFFRecord(); r.setSeqName(id); r.setFeature("block"); r.setSource("nmintersectseq"); r.setStrand(StrandedFeature.POSITIVE); for (Iterator<?> bi = l.blockIterator(); bi.hasNext(); ) { Location bloc = (Location) bi.next(); r.setStart(bloc.getMin()); r.setEnd(bloc.getMax()); r.setComment(""); r.setGroupAttributes(new HashMap<Object, Object>()); if (gffw != null) { gffw.recordLine(r); } else { Sequence seq = new SimpleSequence( seqDB.getSequence(id).subList(bloc.getMin(), bloc.getMax()), null, String.format("%s_|%d-%d|", id, bloc.getMin(), bloc.getMax()), Annotation.EMPTY_ANNOTATION); RichSequence.IOTools.writeFasta(System.out, seq, null); } } } if (pw != null) pw.flush(); if (seqDB != null) { System.err.println("Writing output sequences..."); GFFTools.annotateSequences(seqDB, gffEntries); } }