Java FiniteAlphabet Examples, org.biojava.bio.symbol.FiniteAlphabet Java Examples

Example #1

0

Show file

File: AbstractDistribution.java Project: sbliven/biojava-legacy

  /**
   * Retrieve the weight for this distribution.
   *
   * <p>Performs the standard munge to handle ambiguity symbols. The actual weights for each atomic
   * symbol should be calculated by the getWeightImpl functions.
   *
   * @param sym the Symbol to find the probability of
   * @return the probability that one of the symbols matching amb was emitted
   * @throws IllegalSymbolException if for any reason the symbols within amb are not recognized by
   *     this state
   */
  public final double getWeight(Symbol sym) throws IllegalSymbolException {
    if (sym instanceof AtomicSymbol) {
      return getWeightImpl((AtomicSymbol) sym);
    } else {
      Alphabet ambA = sym.getMatches();
      if (((FiniteAlphabet) ambA).size() == 0) { // a gap
        getAlphabet().validate(sym);

        double totalWeight = 0.0;
        for (Iterator i = ((FiniteAlphabet) getAlphabet()).iterator(); i.hasNext(); ) {

          Symbol s = (Symbol) i.next();
          totalWeight += getWeight(s);
        }
        return 1.0 - totalWeight;
      }
      if (ambA instanceof FiniteAlphabet) {
        FiniteAlphabet fa = (FiniteAlphabet) ambA;
        double sum = 0.0;
        for (Iterator i = fa.iterator(); i.hasNext(); ) {
          Object obj = i.next();
          if (!(obj instanceof AtomicSymbol)) {
            throw new BioError("Assertion Failure: Not an instance of AtomicSymbol: " + obj);
          }
          AtomicSymbol as = (AtomicSymbol) obj;
          sum += getWeightImpl(as);
        }
        return sum;
      } else {
        throw new IllegalSymbolException(
            "Can't find weight for infinite set of symbols matched by " + sym.getName());
      }
    }
  }

Example #2

0

Show file

File: AbstractDistribution.java Project: sbliven/biojava-legacy

 private void doSetWeight(Symbol sym, double weight)
     throws IllegalSymbolException, ChangeVetoException {
   if (sym instanceof AtomicSymbol) {
     setWeightImpl((AtomicSymbol) sym, weight);
   } else {
     // need to divide the weight up amongst the atomic symbols according
     // to the null model
     FiniteAlphabet fa = (FiniteAlphabet) sym.getMatches();
     double totalNullWeight = this.getNullModel().getWeight(sym);
     for (Iterator si = fa.iterator(); si.hasNext(); ) {
       AtomicSymbol as = (AtomicSymbol) si.next();
       double symNullWeight = this.getNullModel().getWeight(as);
       setWeightImpl(as, weight * symNullWeight / totalNullWeight);
     }
   }
 }

Example #3

0

Show file

File: BlastLikeSearchBuilder.java Project: sbliven/biojava-legacy

  /**
   * <code>makeSubHit</code> creates a new sub-hit.
   *
   * @return a <code>SeqSimilaritySearchSubHit</code>.
   * @exception BioException if an error occurs.
   */
  private SeqSimilaritySearchSubHit makeSubHit() throws BioException {
    // Try to get a valid TokenParser
    if (tokenParser == null) {
      String identifier;

      // Try explicit sequence type first
      if (subHitData.containsKey("subjectSequenceType"))
        identifier = (String) subHitData.get("subjectSequenceType");
      // Otherwise try to resolve from the program name (only
      // works for Blast)
      else if (resultPreAnnotation.containsKey("program"))
        identifier = (String) resultPreAnnotation.get("program");
      else throw new BioException("Failed to determine sequence type");

      FiniteAlphabet alpha = AlphabetResolver.resolveAlphabet(identifier);
      tokenParser = alpha.getTokenization("token");
    }

    // BLASTP output has the strands set null (protein sequences)
    Strand qStrand = null;
    Strand sStrand = null;

    // Override where an explicit strand is given (FASTA DNA,
    // BLASTN)
    if (subHitData.containsKey("queryStrand"))
      if (subHitData.get("queryStrand").equals("plus")) qStrand = StrandedFeature.POSITIVE;
      else qStrand = StrandedFeature.NEGATIVE;

    if (subHitData.containsKey("subjectStrand"))
      if (subHitData.get("subjectStrand").equals("plus")) sStrand = StrandedFeature.POSITIVE;
      else sStrand = StrandedFeature.NEGATIVE;

    // Override where a frame is given as this contains strand
    // information (BLASTX for query, TBLASTN for hit, TBLASTX for
    // both)
    if (subHitData.containsKey("queryFrame"))
      if (((String) subHitData.get("queryFrame")).startsWith("plus"))
        qStrand = StrandedFeature.POSITIVE;
      else qStrand = StrandedFeature.NEGATIVE;

    if (subHitData.containsKey("subjectFrame"))
      if (((String) subHitData.get("subjectFrame")).startsWith("plus"))
        sStrand = StrandedFeature.POSITIVE;
      else sStrand = StrandedFeature.NEGATIVE;

    // Get start/end
    int qStart = Integer.parseInt((String) subHitData.get("querySequenceStart"));
    int qEnd = Integer.parseInt((String) subHitData.get("querySequenceEnd"));
    int sStart = Integer.parseInt((String) subHitData.get("subjectSequenceStart"));
    int sEnd = Integer.parseInt((String) subHitData.get("subjectSequenceEnd"));

    // The start/end coordinates from BioJava XML don't follow the
    // BioJava paradigm of start < end, with orientation given by
    // the strand property. Rather, they present start/end as
    // displayed in BLAST output, with the coordinates being
    // inverted on the reverse strand. We account for this here.
    if (qStrand == StrandedFeature.NEGATIVE) {
      int swap = qStart;
      qStart = qEnd;
      qEnd = swap;
    }

    if (sStrand == StrandedFeature.NEGATIVE) {
      int swap = sStart;
      sStart = sEnd;
      sEnd = swap;
    }

    // Get scores
    double sc = Double.NaN;
    double ev = Double.NaN;
    double pv = Double.NaN;

    if (subHitData.containsKey("score")) sc = Double.parseDouble((String) subHitData.get("score"));

    if (subHitData.containsKey("expectValue")) {
      String val = (String) subHitData.get("expectValue");
      // Blast sometimes uses invalid formatting such as 'e-156'
      // rather than '1e-156'
      if (val.startsWith("e")) ev = Double.parseDouble("1" + val);
      else ev = Double.parseDouble(val);
    }

    if (subHitData.containsKey("pValue"))
      pv = Double.parseDouble((String) subHitData.get("pValue"));

    Map labelMap = new SmallMap();

    // Note that the following is removing the raw sequences
    StringBuffer tokenBuffer = new StringBuffer(1024);
    tokenBuffer.append((String) subHitData.remove("querySequence"));
    labelMap.put(
        SeqSimilaritySearchSubHit.QUERY_LABEL,
        new SimpleSymbolList(tokenParser, tokenBuffer.substring(0)));

    tokenBuffer = new StringBuffer(1024);
    tokenBuffer.append((String) subHitData.remove("subjectSequence"));
    labelMap.put(
        hitData.get("subjectId"), new SimpleSymbolList(tokenParser, tokenBuffer.substring(0)));

    return new SimpleSeqSimilaritySearchSubHit(
        sc,
        ev,
        pv,
        qStart,
        qEnd,
        qStrand,
        sStart,
        sEnd,
        sStrand,
        new SimpleAlignment(labelMap),
        AnnotationFactory.makeAnnotation(subHitData));
  }