Beispiel #1
0
 public ArrayList<String> symbolListToArrayList(SymbolList sl) {
   ArrayList<String> result = new ArrayList<String>();
   int numberOfSymbols = sl.length();
   for (int i = 1; i <= numberOfSymbols; i++) {
     String onestate = sl.symbolAt(i).getName();
     result.add(onestate);
   }
   return result;
 } /*symbolListToArrayList*/
  public String tokenizeSymbolList(SymbolList sl) throws IllegalAlphabetException {
    if (sl.getAlphabet() != getAlphabet()) {
      throw new IllegalAlphabetException(
          "Alphabet " + sl.getAlphabet().getName() + " does not match " + getAlphabet().getName());
    }
    StringBuffer sb = new StringBuffer();
    for (Iterator i = sl.iterator(); i.hasNext(); ) {
      Symbol sym = (Symbol) i.next();
      try {
        Character c = _tokenizeSymbol(sym);
        sb.append(c.charValue());
      } catch (IllegalSymbolException ex) {
        throw new IllegalAlphabetException(ex, "Couldn't tokenize");
      }
    }

    return sb.substring(0);
  }
  public SymbolList mutate(SymbolList seq)
      throws ChangeVetoException, IllegalAlphabetException, IllegalSymbolException {

    int maxIndex = getMutationProbs().length - 1;
    OrderNDistribution d = getMutationSpectrum();
    Random r = new Random();

    for (int i = 1; i < seq.length(); i++) {
      int index = Math.min(i - 1, maxIndex);
      double mutProb = getMutationProbs()[index];

      if (r.nextDouble() < mutProb) {

        Edit e = new Edit(i, seq.getAlphabet(), d.getDistribution(seq.symbolAt(i)).sampleSymbol());
        seq.edit(e);
      }
    }

    return seq;
  }
Beispiel #4
0
  public static void main(String[] args) throws Exception {
    // TODO Auto-generated method stub

    // TODO Auto-generated method stub

    VITERBI3 app = new VITERBI3();

    String serDirName = app.getSerDir();
    String aliDirName = app.getAliDir();
    String[] stateNames = app.getStates();
    double[] strProb = app.getStartProbabilities();
    double[][] tranProb = app.getTransitionProbability();
    app.checkTransitionMatrixNormality(tranProb);

    double scoreThr = app.getScoreThreshold();
    int minLenForTrhoughs = app.getMinLengthThreshold();
    int minLenForPeakDis = app.getMinPeakLengths();
    int lengthThresholdForBlocks = app.getBlockLengthThreshold();

    double backGroundAlpha[] = {
      0.7, 0.8, 1
    }; // {0.15106438458196936,0.20973711740901407,0.22900628496706044}  {0.8, 0.9, 1}
    double greenStateAlpha[] = {
      0.9, 2.3, 1
    }; // {0.1713002602078479,11.729438447100865,1.3763120488468623}; {0.9,2.3,1}
    double greenFollowingMixedAlpha[] = {
      0.8, 2.3, 0.9
    }; // {0.16327302239493097,9.219505194760753,1.8283941303615243} {0.8,2.3,0.9}
    double greenToRedEdgeAlpah[] = {
      1.3, 1.4, 1.2
    }; // {0.4799631840073914,0.4944929532552769,0.2857067163030646}; {1.3,1.4,1.2}
    double redStateAlpha[] = {
      2.5, 1, 0.8
    }; // {71.25302335371902,0.201959003605188,0.20299272598532336} {5,1.1,0.9}
    double redToGreenEdgeAlpha[] = {
      5, 1.1, 0.9
    }; // {0.39728078185044136,0.3710506533397517,0.2490053255191496}{1.4,1.3,1.2}
    // double redFollowingMixedAlpha  []  = {10,0.1,0.1};
    // //{4.5200496933818615,0.1772646945273629,0.17979061871698085};
    double junctionStateAlpha[] = {0.7, 0.8, 1};

    LinkedHashMap<String, double[]> statesAndDirPar = new LinkedHashMap<String, double[]>();
    // statesAndDirPar.put("P", x);

    statesAndDirPar.put("M", backGroundAlpha);
    statesAndDirPar.put("r", redStateAlpha);
    statesAndDirPar.put("E", redToGreenEdgeAlpha);
    statesAndDirPar.put("G", greenStateAlpha);
    statesAndDirPar.put("g", greenFollowingMixedAlpha);
    statesAndDirPar.put("e", greenToRedEdgeAlpah);
    statesAndDirPar.put("R", redStateAlpha);
    statesAndDirPar.put("J", junctionStateAlpha);

    File serDir = new File(serDirName);
    for (File aSerFile : serDir.listFiles()) {
      String oneFileName = aSerFile.getName();
      if (oneFileName.startsWith(".")) {
        continue;
      }
      BLOCK block = new BLOCK(aSerFile, aliDirName);
      String blockId = block.blockId;
      int blockLength = app.getBlockLength(blockId);
      if (blockLength < lengthThresholdForBlocks) {
        // System.out.println(" found sequence :" + blockId + " with too short length :" +
        // blockLength);
        continue;
      }
      Matrix2D m = block.blockMatrix;
      Sequence seq = block.blockSeq;
      ArrayList<Integer> rowsWithSumZero = block.missingData;
      SimpleAlphabet observedSeqAlphabet = block.blockObservedSeqAlphabet;
      SimpleSymbolList symbolList = block.blockSimpleSymbolList;

      // System.out.println("seq " + blockId + " is processed");

      MarkovModel mm =
          BLOCK.makeMarkovModel(
              observedSeqAlphabet, tranProb, strProb, statesAndDirPar, "dirichletMM");
      DP dp = new SingleDP(mm);

      SymbolList[] symList = {symbolList};

      StatePath viterbiPath = dp.viterbi(symList, ScoreType.PROBABILITY);
      SymbolList symbolsInViterbi = viterbiPath.symbolListForLabel(StatePath.STATES);
      ArrayList<String> viterbiPathAsAnArrayList = app.symbolListToArrayList(symbolsInViterbi);
      System.out.println("veterbi path length for " + blockId + " is " + viterbiPath.length());

      ArrayList<String> ViterbiPath = new ArrayList<String>();

      for (int i = 1; i <= symbolsInViterbi.length(); i++) {
        Symbol oneSym = symbolsInViterbi.symbolAt(i);
        ViterbiPath.add(oneSym.getName());
        // System.out.print(oneSym.getName());
      }

      System.out.println();
      boolean isFiltering = app.getFilterMeaningLessPicks();
      app.filterMeaningLessPicks(ViterbiPath, isFiltering);

      ArrayList<Double> viterbiTranslatedToNumbersAndMappedToAlingment =
          app.mapViterbiPathToAlignment(ViterbiPath, seq, rowsWithSumZero);
      String outputFileName =
          "/Users/hk3/Desktop/Main/Composure_Droshophila_Model/ANALYSIS_EISENLAB/2L/ViterbiPaths/VITERBI2_"
              + blockId
              + ".txt";
      app.printoutViterbiPath(viterbiTranslatedToNumbersAndMappedToAlingment, outputFileName);

      SingleDPMatrix forwardMatrix =
          (SingleDPMatrix) dp.forwardMatrix(new SymbolList[] {symbolList}, ScoreType.PROBABILITY);

      double score = forwardMatrix.getScore();
      // System.err.printf("Forward: %g%n", score);
      System.out.println();
      SingleDPMatrix backwardMatrix =
          (SingleDPMatrix) dp.backwardMatrix(new SymbolList[] {symbolList}, ScoreType.PROBABILITY);

      ArrayList<Double> posteriorScores =
          app.getPosteriorDecodingScores(forwardMatrix, backwardMatrix);
      // posteriorScores = app.fillInShortTroughs(posteriorScores, scoreThr, minLenForTrhoughs);
      // posteriorScores = app.filterOutShortPeaks(posteriorScores, scoreThr, minLenForPeakDis);
      // posteriorScores = app.getEnhancerRegions(posteriorScores, scoreThr);
      ArrayList<Double> PDScoresMapedToalignment =
          app.mapPDScoresToAlignment(posteriorScores, seq, rowsWithSumZero);
      String posteriorOutputFileName =
          "/Users/hk3/Desktop/Main/Composure_Droshophila_Model/ANALYSIS_EISENLAB/2L/ViterbiPaths/Posterior_"
              + blockId
              + ".txt";
      app.printoutViterbiPath(PDScoresMapedToalignment, posteriorOutputFileName);

      // writte chains into gff files
      String chainDir = app.getChainDir();
      app.wirteChainsIntoAGffFile(PDScoresMapedToalignment, blockId, chainDir, scoreThr);
    }
  } /*main*/
Beispiel #5
0
  private void dnaCommand(HttpServletRequest req, DazzleResponse resp, DazzleDataSource dds)
      throws IOException, DataSourceException, ServletException, DazzleException {

    DazzleReferenceSource drs = (DazzleReferenceSource) dds;

    List segments = DazzleTools.getSegments(dds, req, resp);
    if (segments.size() == 0) {
      throw new DazzleException(
          DASStatus.STATUS_BAD_COMMAND_ARGUMENTS, "No segments specified for dna command");
    }

    // Fetch and validate the requests.

    Map segmentResults = new HashMap();
    for (Iterator i = segments.iterator(); i.hasNext(); ) {
      Segment seg = (Segment) i.next();

      try {
        Sequence seq = drs.getSequence(seg.getReference());
        if (seq.getAlphabet() != DNATools.getDNA()) {
          throw new DazzleException(
              DASStatus.STATUS_SERVER_ERROR,
              "Sequence " + seg.toString() + " is not in the DNA alphabet");
        }
        if (seg.isBounded()) {
          if (seg.getMin() < 1 || seg.getMax() > seq.length()) {
            throw new DazzleException(
                DASStatus.STATUS_BAD_COORDS,
                "Segment " + seg.toString() + " doesn't fit sequence of length " + seq.length());
          }
        }
        segmentResults.put(seg, seq);
      } catch (NoSuchElementException ex) {
        throw new DazzleException(DASStatus.STATUS_BAD_REFERENCE, ex);
      } catch (DataSourceException ex) {
        throw new DazzleException(DASStatus.STATUS_SERVER_ERROR, ex);
      }
    }

    //
    // Looks okay -- generate the response document
    //

    XMLWriter xw = resp.startDasXML("DASDNA", "dasdna.dtd");

    try {
      xw.openTag("DASDNA");
      for (Iterator i = segmentResults.entrySet().iterator(); i.hasNext(); ) {
        Map.Entry me = (Map.Entry) i.next();
        Segment seg = (Segment) me.getKey();
        Sequence seq = (Sequence) me.getValue();

        xw.openTag("SEQUENCE");
        xw.attribute("id", seg.getReference());
        xw.attribute("version", drs.getLandmarkVersion(seg.getReference()));
        if (seg.isBounded()) {
          xw.attribute("start", "" + seg.getStart());
          xw.attribute("stop", "" + seg.getStop());
        } else {
          xw.attribute("start", "" + 1);
          xw.attribute("stop", "" + seq.length());
        }

        SymbolList syms = seq;
        if (seg.isBounded()) {
          syms = syms.subList(seg.getMin(), seg.getMax());
        }
        if (seg.isInverted()) {
          syms = DNATools.reverseComplement(syms);
        }

        xw.openTag("DNA");
        xw.attribute("length", "" + syms.length());

        for (int pos = 1; pos <= syms.length(); pos += 60) {
          int maxPos = Math.min(syms.length(), pos + 59);
          xw.println(syms.subStr(pos, maxPos));
        }

        xw.closeTag("DNA");
        xw.closeTag("SEQUENCE");
      }
      xw.closeTag("DASDNA");
      xw.close();
    } catch (Exception ex) {
      throw new DazzleException(ex, "Error writing DNA document");
    }
  }
Beispiel #6
0
  /**
   * Calculate the predicted properties of this polypeptide.
   *
   * @return a <code>PeptideProperties</code> object containing the predicted properties of this
   *     polypeptide.
   */
  public PeptideProperties calculateStats() {
    if (this.getResidues() == null) {
      logger.warn("No residues for '" + this.getUniqueName() + "'");
      return null;
    }
    String residuesString = new String(this.getResidues());

    SymbolList residuesSymbolList = null;
    PeptideProperties pp = new PeptideProperties();
    try {
      SymbolTokenization proteinTokenization = ProteinTools.getTAlphabet().getTokenization("token");
      residuesSymbolList = new SimpleSymbolList(proteinTokenization, residuesString);

      if (residuesSymbolList.length() == 0) {
        logger.error(
            String.format(
                "Polypeptide feature '%s' has zero-length residues", this.getUniqueName()));
        return pp;
      }

      try {
        // if the sequence ends with a termination symbol (*), we need to remove it
        if (residuesSymbolList.symbolAt(residuesSymbolList.length()) == ProteinTools.ter()) {
          if (residuesSymbolList.length() == 1) {
            logger.error(
                String.format(
                    "Polypeptide feature '%s' only has termination symbol", this.getUniqueName()));
            return pp;
          }
          residuesSymbolList = residuesSymbolList.subList(1, residuesSymbolList.length() - 1);
        }

      } catch (IndexOutOfBoundsException exception) {
        throw new RuntimeException(exception);
      }
    } catch (BioException e) {
      logger.error("Can't translate into a protein sequence", e);
      return pp;
    }

    pp.setAminoAcids(residuesSymbolList.length());

    try {
      double isoElectricPoint = new IsoelectricPointCalc().getPI(residuesSymbolList, false, false);
      pp.setIsoelectricPoint(isoElectricPoint);
    } catch (Exception e) {
      logger.error(
          String.format("Error computing protein isoelectric point for '%s'", residuesSymbolList),
          e);
    }

    double mass2 = calculateMass(residuesSymbolList);
    if (mass2 != -1) {
      // mass = mass2;
      pp.setMass(mass2);
    }

    double charge = calculateCharge(residuesString);
    pp.setCharge(charge);

    return pp;
  }
Beispiel #7
0
 /**
  * Returns the input subsequence matched by the previous match.
  *
  * <p>For a matcher m with input sequence s, the expressions m.group() and s.substring(m.start(),
  * m.end()) are equivalent. Note that some patterns, for example a*, match the empty SymbolList.
  * This method will return the empty string when the pattern successfully matches the empty string
  * in the input.
  *
  * @return The (possibly empty) subsequence matched by the previous match, in SymbolList form.
  */
 public SymbolList group() {
   return sl.subList(start(), end() - 1);
 }
Beispiel #8
0
 /**
  * Returns the input subsequence captured by the given group during the previous match operation.
  *
  * <p>For a matcher m, input sequence s, and group index g, the expressions m.group(g) and
  * s.substring(m.start(g), m.end(g)) are equivalent. Capturing groups are indexed from left to
  * right, starting at one. Group zero denotes the entire pattern, so the expression m.group(0) is
  * equivalent to m.group(). If the match was successful but the group specified failed to match
  * any part of the input sequence, then null is returned. Note that some groups, for example (a*),
  * match the empty string. This method will return the empty string when such a group successfully
  * matches the emtpy string in the input.
  *
  * @return The (possibly empty) subsequence captured by the group during the previous match, or
  *     null if the group failed to match part of the input.
  */
 public SymbolList group(int group) throws IndexOutOfBoundsException {
   int start = matcher.start(group);
   int end = matcher.end(group);
   if ((start == -1) && (end == -1)) return null;
   else return sl.subList(start(group), end(group) - 1);
 }