public void parseInputFile(File inputFile) throws IOException { geneFeatures.clear(); otherRecords.clear(); try { GFFEntrySet gffEntries = GFFTools.readGFF(inputFile); Iterator itr = gffEntries.lineIterator(); int count = 0; int intronFeatures = 0; LinkedList<GFFRecord> cdsRecs = new LinkedList<GFFRecord>(); while (itr.hasNext()) { Object val = itr.next(); if (val instanceof GFFRecord) { GFFRecord rec = (GFFRecord) val; count += 1; if (rec.getFeature().endsWith("gene")) { GeneFeatures gf = new GeneFeatures(rec); geneFeatures.put(gf.id, gf); } else if (rec.getFeature().equals("CDS")) { cdsRecs.addLast(rec); } else { otherRecords.add(rec); } } } for (GFFRecord rec : cdsRecs) { Map<String, List<String>> attrs = decodeAttrMap(rec); if (geneFeatures.containsKey(attrs.get("Parent").get(0))) { geneFeatures.get(attrs.get("Parent").get(0)).addCDS(rec, attrs); } else { System.err.println("Unknown CDS Parent: " + attrs.get("Parent").get(0)); } } for (String k : geneFeatures.keySet()) { GeneFeatures gf = geneFeatures.get(k); if (gf.cds != null && gf.cds.size() > 1) { intronFeatures++; } } System.err.println("# GFF Records: " + count); System.err.println("# Gene Feature Sets: " + geneFeatures.size()); System.err.println("\t# Intron-Features: " + intronFeatures); } catch (ParserException e) { e.printStackTrace(); } catch (BioException e) { e.printStackTrace(); } }
public static Pair<String, List<String>> decodeKeyValues(String str) { int index = str.indexOf("="); if (index == -1) { return null; } String k = str.substring(0, index); LinkedList<String> vlist = new LinkedList<String>(); try { String vstr = URLDecoder.decode(str.substring(index + 1, str.length()), "UTF-8"); String[] array = vstr.split(","); for (int i = 0; i < array.length; i++) { vlist.addLast(array[i]); } return new Pair<String, List<String>>(k, vlist); } catch (UnsupportedEncodingException e) { System.err.println("BAD STRING " + str); e.printStackTrace(); return null; } catch (Exception e) { System.err.println("BAD STRING " + str); return null; } }
public void addCDS(GFFRecord rec, Map<String, List<String>> attrs) { if (!rec.getFeature().equals("CDS")) { throw new IllegalArgumentException(); } if (attrs == null) { attrs = decodeAttrMap(rec); } if (!attrs.containsKey("Parent")) { throw new IllegalArgumentException(); } if (!attrs.get("Parent").contains(id)) { throw new IllegalArgumentException(); } cds.addLast(rec); }