/** * Delete an exon. Deletes both the transcript -> exon and exon -> transcript relationships. * * @param exon - Exon to be deleted */ public void deleteExon(Exon exon) { Collection<CVTerm> partOfCvterms = conf.getCVTermsForClass("PartOf"); Collection<CVTerm> exonCvterms = conf.getCVTermsForClass("Exon"); Collection<CVTerm> transcriptCvterms = conf.getCVTermsForClass("Transcript"); // delete transcript -> exon child relationship for (FeatureRelationship fr : feature.getChildFeatureRelationships()) { if (!partOfCvterms.contains(fr.getType())) { continue; } if (!exonCvterms.contains(fr.getSubjectFeature().getType())) { continue; } if (fr.getSubjectFeature().equals(exon.getFeature())) { boolean ok = feature.getChildFeatureRelationships().remove(fr); break; } } // delete transcript -> exon parent relationship for (FeatureRelationship fr : exon.getFeature().getParentFeatureRelationships()) { if (!partOfCvterms.contains(fr.getType())) { continue; } if (!transcriptCvterms.contains(fr.getObjectFeature().getType())) { continue; } if (fr.getSubjectFeature().equals(exon.getFeature())) { boolean ok = exon.getFeature().getParentFeatureRelationships().remove(fr); break; } } }
@Override public boolean overlaps(FeatureLocation location, boolean compareStrands) { for (Exon exon : getExons()) { if (exon.overlaps(location, compareStrands)) { return true; } } return false; }
public void handleEndElement( OtterContentHandler theContentHandler, String namespaceURI, String localName, String qualifiedName) { Exon exon = (Exon) theContentHandler.getStackObject(); int strand = Integer.valueOf(getCharacters()).intValue(); exon.setStrand(strand); super.handleEndElement(theContentHandler, namespaceURI, localName, qualifiedName); }
@Override public String getResidues() { if (feature.getResidues() != null) { return feature.getResidues(); } if (getExons() == null || getExons().size() == 0) { return super.getResidues(); } StringBuilder residues = new StringBuilder(); List<Exon> exons = BioObjectUtil.createSortedFeatureListByLocation(getExons()); for (Exon exon : exons) { if (exon.getResidues() != null) { residues.append(exon.getResidues()); } } return residues.length() > 0 ? residues.toString() : null; }
/** * Convenience method. Write a list of features out as a BED file * * @param features * @param outputfile */ public static void dumpFeatures(List<IGVFeature> features, String outputfile) { PrintWriter pw = null; try { pw = new PrintWriter(new FileWriter(outputfile)); pw.println("Header row"); for (IGVFeature gene : features) { pw.print(gene.getName() + "\t"); pw.print(gene.getIdentifier() + "\t"); pw.print(gene.getChr() + "\t"); if (gene.getStrand() == Strand.POSITIVE) { pw.print("+\t"); } else if (gene.getStrand() == Strand.NEGATIVE) { pw.print("-\t"); } else { pw.print(" \t"); } pw.print(gene.getStart() + "\t"); pw.print(gene.getEnd() + "\t"); List<Exon> regions = gene.getExons(); pw.print(regions.size() + "\t"); for (Exon exon : regions) { pw.print(exon.getStart() + ","); } pw.print("\t"); for (Exon exon : regions) { pw.print(exon.getEnd() + ","); } pw.println(); } } catch (IOException e) { e.printStackTrace(); } finally { if (pw != null) { pw.close(); } } }
/** * Add an exon. If the exon's bounds are beyond the transcript's bounds, the transcript's bounds * are adjusted accordingly. Sets the exon's transcript to this transcript object. * * @param exon - Exon to be added */ public void addExon(Exon exon) { CVTerm partOfCvterm = conf.getDefaultCVTermForClass("PartOf"); // if the exon's bounds are beyond the transcript's bounds, need to adjust the transcript's // bounds if (exon.getFeatureLocation().getFmin() < getFeatureLocation().getFmin()) { getFeatureLocation().setFmin(exon.getFeatureLocation().getFmin()); } if (exon.getFeatureLocation().getFmax() > getFeatureLocation().getFmax()) { getFeatureLocation().setFmax(exon.getFeatureLocation().getFmax()); } // if the transcript's bounds are beyond the gene's bounds, need to adjust the gene's bounds if (getGene() != null) { if (getFmin() < getGene().getFmin()) { getGene().setFmin(getFmin()); } if (getFmax() > getGene().getFmax()) { getGene().setFmax(getFmax()); } } // add exon int rank = 0; // TODO: do we need to figure out the rank? feature .getChildFeatureRelationships() .add(new FeatureRelationship(partOfCvterm, feature, exon.getFeature(), rank)); exon.setTranscript(this); }
@Override public int convertLocalCoordinateToSourceCoordinate(int localCoordinate) { List<Exon> exons = BioObjectUtil.createSortedFeatureListByLocation(getExons()); int sourceCoordinate = -1; if (exons.size() == 0) { return super.convertLocalCoordinateToSourceCoordinate(localCoordinate); } int currentLength = 0; int currentCoordinate = localCoordinate; for (Exon exon : exons) { int exonLength = exon.getLength(); if (currentLength + exonLength >= localCoordinate) { if (getFeatureLocation().getStrand() == -1) { sourceCoordinate = exon.getFeatureLocation().getFmax() - currentCoordinate - 1; } else { sourceCoordinate = exon.getFeatureLocation().getFmin() + currentCoordinate; } break; } currentLength += exonLength; currentCoordinate -= exonLength; } return sourceCoordinate; }
@Override public int convertSourceCoordinateToLocalCoordinate(int sourceCoordinate) { List<Exon> exons = BioObjectUtil.createSortedFeatureListByLocation(getExons()); int localCoordinate = -1; if (exons.size() == 0) { return super.convertSourceCoordinateToLocalCoordinate(sourceCoordinate); } int currentCoordinate = 0; for (Exon exon : exons) { if (exon.getFeatureLocation().getFmin() <= sourceCoordinate && exon.getFeatureLocation().getFmax() >= sourceCoordinate) { if (getFeatureLocation().getStrand() == -1) { localCoordinate = currentCoordinate + (exon.getFeatureLocation().getFmax() - sourceCoordinate) - 1; } else { localCoordinate = currentCoordinate + (sourceCoordinate - exon.getFeatureLocation().getFmin()); } } currentCoordinate += exon.getLength(); } return localCoordinate; }
public static void main(String args[]) { double totalTime; double startTime = System.currentTimeMillis(); parseArguments(args); // preprocess input files to find the chromosomal boundaries in terms of line number /* * All files should be sorted in chromosomal order */ System.out.println("Preprocessing input files to find chromosome boundaries..."); System.out.println("\tExons file.."); ArrayList<Integer> exonFileChrBoundaries = getChromosomalBoundaries(exonFileName, 0); System.out.println("\tExpression file.."); ArrayList<Integer> exprFileChrBoundaries = getChromosomalBoundaries(expressionFileName, 2); System.out.println("\tMapped reads file.."); ArrayList<Integer> readsFileChrBoundaries = getChromosomalBoundaries(mappedReadsFileName, 2); try { BufferedReader inputExons = new BufferedReader(new FileReader(exonFileName)); BufferedReader inputExpr = new BufferedReader(new FileReader(expressionFileName)); BufferedReader inputSAMData = new BufferedReader(new FileReader(mappedReadsFileName)); for (int chromosome : chromosomes) { int arrayPosition = chromosome - chromosomes.get(0) + 1; System.out.println("Chromosome " + chromosome); System.out.println("Reading exons file...."); int numberOfLines = exonFileChrBoundaries.get(arrayPosition) - exonFileChrBoundaries.get(arrayPosition - 1); double currentTime = System.currentTimeMillis(); Exons = Exon.readExon(inputExons, chromosome, numberOfLines); double totalExonReadTime = (System.currentTimeMillis() - currentTime) / 1000F; Exon.sortExons(Exons); System.out.println("Reading expression file...."); numberOfLines = exprFileChrBoundaries.get(arrayPosition) - exprFileChrBoundaries.get(arrayPosition - 1); ArrayList<Expression> Expressions = new ArrayList<Expression>(); currentTime = System.currentTimeMillis(); Expressions = Expression.readExon(inputExpr, chromosome, numberOfLines); double totalExprReadTime = (System.currentTimeMillis() - currentTime) / 1000F; int numberOfExpr = Expressions.size(); System.out.println("Calculating FPKMs...."); currentTime = System.currentTimeMillis(); Exon.getFPKM(Expressions, Exons); double totalFPKMCalcTime = (System.currentTimeMillis() - currentTime) / 1000F; Expressions.removeAll(Expressions); // explicitly deleting to free up memory System.out.println("Reading mapped reads SAM file...."); numberOfLines = readsFileChrBoundaries.get(arrayPosition) - readsFileChrBoundaries.get(arrayPosition - 1); ArrayList<MappedReads> mappedReads = new ArrayList<MappedReads>(); currentTime = System.currentTimeMillis(); mappedReads = MappedReads.readMappedReads(inputSAMData, chromosome, numberOfLines); double totalReadsReadTime = (System.currentTimeMillis() - currentTime) / 1000F; MappedReads.sort(mappedReads); int numberOfReads = mappedReads.size(); System.out.println("Reading reference genome file...."); String referenceFileName = referenceDirectory + "/chr" + chromosome + ".fa"; currentTime = System.currentTimeMillis(); RandomAccessFile inputReference = new RandomAccessFile(referenceFileName, "r"); for (Exon e : Exons) { e.getReferenceSequence(inputReference); } double totalRefCalcTime = (System.currentTimeMillis() - currentTime) / 1000F; System.out.println("Calculating SNPs...."); currentTime = System.currentTimeMillis(); Exon.getSNPs(Exons, mappedReads); double totalSNPsCalcTime = (System.currentTimeMillis() - currentTime) / 1000F; mappedReads.removeAll(mappedReads); System.out.println("Calculating States...."); currentTime = System.currentTimeMillis(); HiddenMarkovModel.getStates(Exons); double totalStateCalcTime = (System.currentTimeMillis() - currentTime) / 1000F; // Print output if (outputFileName.equals("")) { // print to stdout for (Exon e : Exons) System.out.println(e); } else { Writer output = new BufferedWriter(new FileWriter(outputFileName)); for (Exon e : Exons) output.write(e + "\n"); output.close(); } // prints the timing metrics to std out if (printTimingMetrics) { double endTime = System.currentTimeMillis(); totalTime = (endTime - startTime) / 1000F; System.out.println("Total Time: " + totalTime); System.out.println( "Time for reading exons file : " + totalExonReadTime + ", " + Exons.size()); System.out.println( "Time for reading expression file : " + totalExprReadTime + ", " + numberOfExpr); System.out.println( "Time for reading mapped reads file: " + totalReadsReadTime + ", " + numberOfReads); System.out.println("Time for getting reference seq : " + totalRefCalcTime); System.out.println("Time for calculating FPKM : " + totalFPKMCalcTime); System.out.println("Time for calculating Num of SNPs : " + totalSNPsCalcTime); System.out.println("Time for calculating States : " + totalStateCalcTime); } } } catch (Exception e) { System.err.println("Exception: " + e.getMessage()); e.printStackTrace(); } }