public ArrayList<String> symbolListToArrayList(SymbolList sl) { ArrayList<String> result = new ArrayList<String>(); int numberOfSymbols = sl.length(); for (int i = 1; i <= numberOfSymbols; i++) { String onestate = sl.symbolAt(i).getName(); result.add(onestate); } return result; } /*symbolListToArrayList*/
public String tokenizeSymbolList(SymbolList sl) throws IllegalAlphabetException { if (sl.getAlphabet() != getAlphabet()) { throw new IllegalAlphabetException( "Alphabet " + sl.getAlphabet().getName() + " does not match " + getAlphabet().getName()); } StringBuffer sb = new StringBuffer(); for (Iterator i = sl.iterator(); i.hasNext(); ) { Symbol sym = (Symbol) i.next(); try { Character c = _tokenizeSymbol(sym); sb.append(c.charValue()); } catch (IllegalSymbolException ex) { throw new IllegalAlphabetException(ex, "Couldn't tokenize"); } } return sb.substring(0); }
public SymbolList mutate(SymbolList seq) throws ChangeVetoException, IllegalAlphabetException, IllegalSymbolException { int maxIndex = getMutationProbs().length - 1; OrderNDistribution d = getMutationSpectrum(); Random r = new Random(); for (int i = 1; i < seq.length(); i++) { int index = Math.min(i - 1, maxIndex); double mutProb = getMutationProbs()[index]; if (r.nextDouble() < mutProb) { Edit e = new Edit(i, seq.getAlphabet(), d.getDistribution(seq.symbolAt(i)).sampleSymbol()); seq.edit(e); } } return seq; }
public static void main(String[] args) throws Exception { // TODO Auto-generated method stub // TODO Auto-generated method stub VITERBI3 app = new VITERBI3(); String serDirName = app.getSerDir(); String aliDirName = app.getAliDir(); String[] stateNames = app.getStates(); double[] strProb = app.getStartProbabilities(); double[][] tranProb = app.getTransitionProbability(); app.checkTransitionMatrixNormality(tranProb); double scoreThr = app.getScoreThreshold(); int minLenForTrhoughs = app.getMinLengthThreshold(); int minLenForPeakDis = app.getMinPeakLengths(); int lengthThresholdForBlocks = app.getBlockLengthThreshold(); double backGroundAlpha[] = { 0.7, 0.8, 1 }; // {0.15106438458196936,0.20973711740901407,0.22900628496706044} {0.8, 0.9, 1} double greenStateAlpha[] = { 0.9, 2.3, 1 }; // {0.1713002602078479,11.729438447100865,1.3763120488468623}; {0.9,2.3,1} double greenFollowingMixedAlpha[] = { 0.8, 2.3, 0.9 }; // {0.16327302239493097,9.219505194760753,1.8283941303615243} {0.8,2.3,0.9} double greenToRedEdgeAlpah[] = { 1.3, 1.4, 1.2 }; // {0.4799631840073914,0.4944929532552769,0.2857067163030646}; {1.3,1.4,1.2} double redStateAlpha[] = { 2.5, 1, 0.8 }; // {71.25302335371902,0.201959003605188,0.20299272598532336} {5,1.1,0.9} double redToGreenEdgeAlpha[] = { 5, 1.1, 0.9 }; // {0.39728078185044136,0.3710506533397517,0.2490053255191496}{1.4,1.3,1.2} // double redFollowingMixedAlpha [] = {10,0.1,0.1}; // //{4.5200496933818615,0.1772646945273629,0.17979061871698085}; double junctionStateAlpha[] = {0.7, 0.8, 1}; LinkedHashMap<String, double[]> statesAndDirPar = new LinkedHashMap<String, double[]>(); // statesAndDirPar.put("P", x); statesAndDirPar.put("M", backGroundAlpha); statesAndDirPar.put("r", redStateAlpha); statesAndDirPar.put("E", redToGreenEdgeAlpha); statesAndDirPar.put("G", greenStateAlpha); statesAndDirPar.put("g", greenFollowingMixedAlpha); statesAndDirPar.put("e", greenToRedEdgeAlpah); statesAndDirPar.put("R", redStateAlpha); statesAndDirPar.put("J", junctionStateAlpha); File serDir = new File(serDirName); for (File aSerFile : serDir.listFiles()) { String oneFileName = aSerFile.getName(); if (oneFileName.startsWith(".")) { continue; } BLOCK block = new BLOCK(aSerFile, aliDirName); String blockId = block.blockId; int blockLength = app.getBlockLength(blockId); if (blockLength < lengthThresholdForBlocks) { // System.out.println(" found sequence :" + blockId + " with too short length :" + // blockLength); continue; } Matrix2D m = block.blockMatrix; Sequence seq = block.blockSeq; ArrayList<Integer> rowsWithSumZero = block.missingData; SimpleAlphabet observedSeqAlphabet = block.blockObservedSeqAlphabet; SimpleSymbolList symbolList = block.blockSimpleSymbolList; // System.out.println("seq " + blockId + " is processed"); MarkovModel mm = BLOCK.makeMarkovModel( observedSeqAlphabet, tranProb, strProb, statesAndDirPar, "dirichletMM"); DP dp = new SingleDP(mm); SymbolList[] symList = {symbolList}; StatePath viterbiPath = dp.viterbi(symList, ScoreType.PROBABILITY); SymbolList symbolsInViterbi = viterbiPath.symbolListForLabel(StatePath.STATES); ArrayList<String> viterbiPathAsAnArrayList = app.symbolListToArrayList(symbolsInViterbi); System.out.println("veterbi path length for " + blockId + " is " + viterbiPath.length()); ArrayList<String> ViterbiPath = new ArrayList<String>(); for (int i = 1; i <= symbolsInViterbi.length(); i++) { Symbol oneSym = symbolsInViterbi.symbolAt(i); ViterbiPath.add(oneSym.getName()); // System.out.print(oneSym.getName()); } System.out.println(); boolean isFiltering = app.getFilterMeaningLessPicks(); app.filterMeaningLessPicks(ViterbiPath, isFiltering); ArrayList<Double> viterbiTranslatedToNumbersAndMappedToAlingment = app.mapViterbiPathToAlignment(ViterbiPath, seq, rowsWithSumZero); String outputFileName = "/Users/hk3/Desktop/Main/Composure_Droshophila_Model/ANALYSIS_EISENLAB/2L/ViterbiPaths/VITERBI2_" + blockId + ".txt"; app.printoutViterbiPath(viterbiTranslatedToNumbersAndMappedToAlingment, outputFileName); SingleDPMatrix forwardMatrix = (SingleDPMatrix) dp.forwardMatrix(new SymbolList[] {symbolList}, ScoreType.PROBABILITY); double score = forwardMatrix.getScore(); // System.err.printf("Forward: %g%n", score); System.out.println(); SingleDPMatrix backwardMatrix = (SingleDPMatrix) dp.backwardMatrix(new SymbolList[] {symbolList}, ScoreType.PROBABILITY); ArrayList<Double> posteriorScores = app.getPosteriorDecodingScores(forwardMatrix, backwardMatrix); // posteriorScores = app.fillInShortTroughs(posteriorScores, scoreThr, minLenForTrhoughs); // posteriorScores = app.filterOutShortPeaks(posteriorScores, scoreThr, minLenForPeakDis); // posteriorScores = app.getEnhancerRegions(posteriorScores, scoreThr); ArrayList<Double> PDScoresMapedToalignment = app.mapPDScoresToAlignment(posteriorScores, seq, rowsWithSumZero); String posteriorOutputFileName = "/Users/hk3/Desktop/Main/Composure_Droshophila_Model/ANALYSIS_EISENLAB/2L/ViterbiPaths/Posterior_" + blockId + ".txt"; app.printoutViterbiPath(PDScoresMapedToalignment, posteriorOutputFileName); // writte chains into gff files String chainDir = app.getChainDir(); app.wirteChainsIntoAGffFile(PDScoresMapedToalignment, blockId, chainDir, scoreThr); } } /*main*/
private void dnaCommand(HttpServletRequest req, DazzleResponse resp, DazzleDataSource dds) throws IOException, DataSourceException, ServletException, DazzleException { DazzleReferenceSource drs = (DazzleReferenceSource) dds; List segments = DazzleTools.getSegments(dds, req, resp); if (segments.size() == 0) { throw new DazzleException( DASStatus.STATUS_BAD_COMMAND_ARGUMENTS, "No segments specified for dna command"); } // Fetch and validate the requests. Map segmentResults = new HashMap(); for (Iterator i = segments.iterator(); i.hasNext(); ) { Segment seg = (Segment) i.next(); try { Sequence seq = drs.getSequence(seg.getReference()); if (seq.getAlphabet() != DNATools.getDNA()) { throw new DazzleException( DASStatus.STATUS_SERVER_ERROR, "Sequence " + seg.toString() + " is not in the DNA alphabet"); } if (seg.isBounded()) { if (seg.getMin() < 1 || seg.getMax() > seq.length()) { throw new DazzleException( DASStatus.STATUS_BAD_COORDS, "Segment " + seg.toString() + " doesn't fit sequence of length " + seq.length()); } } segmentResults.put(seg, seq); } catch (NoSuchElementException ex) { throw new DazzleException(DASStatus.STATUS_BAD_REFERENCE, ex); } catch (DataSourceException ex) { throw new DazzleException(DASStatus.STATUS_SERVER_ERROR, ex); } } // // Looks okay -- generate the response document // XMLWriter xw = resp.startDasXML("DASDNA", "dasdna.dtd"); try { xw.openTag("DASDNA"); for (Iterator i = segmentResults.entrySet().iterator(); i.hasNext(); ) { Map.Entry me = (Map.Entry) i.next(); Segment seg = (Segment) me.getKey(); Sequence seq = (Sequence) me.getValue(); xw.openTag("SEQUENCE"); xw.attribute("id", seg.getReference()); xw.attribute("version", drs.getLandmarkVersion(seg.getReference())); if (seg.isBounded()) { xw.attribute("start", "" + seg.getStart()); xw.attribute("stop", "" + seg.getStop()); } else { xw.attribute("start", "" + 1); xw.attribute("stop", "" + seq.length()); } SymbolList syms = seq; if (seg.isBounded()) { syms = syms.subList(seg.getMin(), seg.getMax()); } if (seg.isInverted()) { syms = DNATools.reverseComplement(syms); } xw.openTag("DNA"); xw.attribute("length", "" + syms.length()); for (int pos = 1; pos <= syms.length(); pos += 60) { int maxPos = Math.min(syms.length(), pos + 59); xw.println(syms.subStr(pos, maxPos)); } xw.closeTag("DNA"); xw.closeTag("SEQUENCE"); } xw.closeTag("DASDNA"); xw.close(); } catch (Exception ex) { throw new DazzleException(ex, "Error writing DNA document"); } }
/** * Calculate the predicted properties of this polypeptide. * * @return a <code>PeptideProperties</code> object containing the predicted properties of this * polypeptide. */ public PeptideProperties calculateStats() { if (this.getResidues() == null) { logger.warn("No residues for '" + this.getUniqueName() + "'"); return null; } String residuesString = new String(this.getResidues()); SymbolList residuesSymbolList = null; PeptideProperties pp = new PeptideProperties(); try { SymbolTokenization proteinTokenization = ProteinTools.getTAlphabet().getTokenization("token"); residuesSymbolList = new SimpleSymbolList(proteinTokenization, residuesString); if (residuesSymbolList.length() == 0) { logger.error( String.format( "Polypeptide feature '%s' has zero-length residues", this.getUniqueName())); return pp; } try { // if the sequence ends with a termination symbol (*), we need to remove it if (residuesSymbolList.symbolAt(residuesSymbolList.length()) == ProteinTools.ter()) { if (residuesSymbolList.length() == 1) { logger.error( String.format( "Polypeptide feature '%s' only has termination symbol", this.getUniqueName())); return pp; } residuesSymbolList = residuesSymbolList.subList(1, residuesSymbolList.length() - 1); } } catch (IndexOutOfBoundsException exception) { throw new RuntimeException(exception); } } catch (BioException e) { logger.error("Can't translate into a protein sequence", e); return pp; } pp.setAminoAcids(residuesSymbolList.length()); try { double isoElectricPoint = new IsoelectricPointCalc().getPI(residuesSymbolList, false, false); pp.setIsoelectricPoint(isoElectricPoint); } catch (Exception e) { logger.error( String.format("Error computing protein isoelectric point for '%s'", residuesSymbolList), e); } double mass2 = calculateMass(residuesSymbolList); if (mass2 != -1) { // mass = mass2; pp.setMass(mass2); } double charge = calculateCharge(residuesString); pp.setCharge(charge); return pp; }
/** * Returns the input subsequence matched by the previous match. * * <p>For a matcher m with input sequence s, the expressions m.group() and s.substring(m.start(), * m.end()) are equivalent. Note that some patterns, for example a*, match the empty SymbolList. * This method will return the empty string when the pattern successfully matches the empty string * in the input. * * @return The (possibly empty) subsequence matched by the previous match, in SymbolList form. */ public SymbolList group() { return sl.subList(start(), end() - 1); }
/** * Returns the input subsequence captured by the given group during the previous match operation. * * <p>For a matcher m, input sequence s, and group index g, the expressions m.group(g) and * s.substring(m.start(g), m.end(g)) are equivalent. Capturing groups are indexed from left to * right, starting at one. Group zero denotes the entire pattern, so the expression m.group(0) is * equivalent to m.group(). If the match was successful but the group specified failed to match * any part of the input sequence, then null is returned. Note that some groups, for example (a*), * match the empty string. This method will return the empty string when such a group successfully * matches the emtpy string in the input. * * @return The (possibly empty) subsequence captured by the group during the previous match, or * null if the group failed to match part of the input. */ public SymbolList group(int group) throws IndexOutOfBoundsException { int start = matcher.start(group); int end = matcher.end(group); if ((start == -1) && (end == -1)) return null; else return sl.subList(start(group), end(group) - 1); }