Example #1
0
  public Alignment getPairAlignment(int x, int y) {

    SimpleAlignment pairAlignment = new SimpleAlignment();

    StringBuffer sequence0 = new StringBuffer();
    StringBuffer sequence1 = new StringBuffer();

    DataType dataType = alignment.getDataType();
    int stateCount = dataType.getStateCount();

    for (int i = 0; i < alignment.getSiteCount(); i++) {

      int s0 = alignment.getState(x, i);
      int s1 = alignment.getState(y, i);

      char c0 = dataType.getChar(s0);
      char c1 = dataType.getChar(s1);

      if (s0 < stateCount || s1 < stateCount) {
        sequence0.append(c0);
        sequence1.append(c1);
      }
    }

    // trim hanging ends on left
    int left = 0;
    while ((dataType.getState(sequence0.charAt(left)) >= stateCount)
        || (dataType.getState(sequence1.charAt(left)) >= stateCount)) {
      left += 1;
    }

    // trim hanging ends on right
    int right = sequence0.length() - 1;
    while ((dataType.getState(sequence0.charAt(right)) >= stateCount)
        || (dataType.getState(sequence1.charAt(right)) >= stateCount)) {
      right -= 1;
    }

    if (right < left) return null;

    String sequenceString0 = sequence0.substring(left, right + 1);
    String sequenceString1 = sequence1.substring(left, right + 1);

    pairAlignment.addSequence(new Sequence(alignment.getTaxon(x), sequenceString0));
    pairAlignment.addSequence(new Sequence(alignment.getTaxon(y), sequenceString1));

    return pairAlignment;
  }
  /**
   * @param tree
   * @param node
   * @return and array of the total amount of time spent in each of the discrete states along the
   *     branch above the given node.
   */
  private double[] getProcessValues(final Tree tree, final NodeRef node) {

    double[] processValues = null;
    double branchTime = tree.getBranchLength(node);

    if (mode == Mode.MARKOV_JUMP_PROCESS) {
      processValues = (double[]) trait.getTrait(tree, node);
    } else if (mode == Mode.PARSIMONY) {
      // an approximation to dwell times using parsimony, assuming
      // the state changes midpoint on the tree. Does a weighted
      // average of the equally parsimonious state reconstructions
      // at the top and bottom of each branch.

      if (treeChanged) {
        fitchParsimony.initialize(tree);
        // Debugging test to count work
        //                treeInitializeCounter += 1;
        //                if (treeInitializeCounter % 10 == 0) {
        //                    System.err.println("Cnt: "+treeInitializeCounter);
        //                }
        treeChanged = false;
      }
      int[] states = fitchParsimony.getStates(tree, node);
      int[] parentStates = fitchParsimony.getStates(tree, tree.getParent(node));

      processValues = new double[fitchParsimony.getPatterns().getStateCount()];

      for (int state : states) {
        processValues[state] += branchTime / 2;
      }
      for (int state : parentStates) {
        processValues[state] += branchTime / 2;
      }

      for (int i = 0; i < processValues.length; i++) {
        // normalize by the number of equally parsimonious states at each end of the branch
        // processValues should add up to the total branch length
        processValues[i] /= (states.length + parentStates.length) / 2;
      }
    } else if (mode == Mode.NODE_STATES) {
      processValues = new double[dataType.getStateCount()];
      //            if (indicatorParameter != null) {
      //                // this array should be size #states NOT #rates
      //                processValues = new double[indicatorParameter.getDimension()];
      //            } else {
      //                // this array should be size #states NOT #rates
      //                processValues = new double[rateParameter.getDimension()];
      //            }

      // if the states are being sampled - then there is only one possible state at each
      // end of the branch.
      int state = ((int[]) trait.getTrait(tree, node))[traitIndex];
      processValues[state] += branchTime / 2;
      int parentState = ((int[]) trait.getTrait(tree, tree.getParent(node)))[traitIndex];
      processValues[parentState] += branchTime / 2;
    }

    return processValues;
  }
  AbstractCovarionModel(
      String name, DataType dataType, Parameter frequencies, Parameter hiddenFrequencies) {
    super(name);

    this.dataType = dataType;

    setStateCount(dataType.getStateCount());

    updateMatrix = true;

    freqModel = new CovarionFrequencyModel(dataType, frequencies, hiddenFrequencies);
    addModel(freqModel);
  }
  public Object parseXMLObject(XMLObject xo) throws XMLParseException {

    Parameter ratesParameter = null;
    FrequencyModel freqModel = null;

    if (xo.hasChildNamed(FREQUENCIES)) {
      XMLObject cxo = xo.getChild(FREQUENCIES);
      freqModel = (FrequencyModel) cxo.getChild(FrequencyModel.class);
    }

    DataType dataType = DataTypeUtils.getDataType(xo);

    if (dataType == null) dataType = (DataType) xo.getChild(DataType.class);

    //        if (xo.hasAttribute(DataType.DATA_TYPE)) {
    //            String dataTypeStr = xo.getStringAttribute(DataType.DATA_TYPE);
    //            if (dataTypeStr.equals(Nucleotides.DESCRIPTION)) {
    //                dataType = Nucleotides.INSTANCE;
    //            } else if (dataTypeStr.equals(AminoAcids.DESCRIPTION)) {
    //                dataType = AminoAcids.INSTANCE;
    //            } else if (dataTypeStr.equals(Codons.DESCRIPTION)) {
    //                dataType = Codons.UNIVERSAL;
    //            } else if (dataTypeStr.equals(TwoStates.DESCRIPTION)) {
    //                dataType = TwoStates.INSTANCE;
    //            }
    //        }

    if (dataType == null) dataType = freqModel.getDataType();

    if (dataType != freqModel.getDataType()) {
      throw new XMLParseException(
          "Data type of "
              + getParserName()
              + " element does not match that of its frequencyModel.");
    }

    XMLObject cxo = xo.getChild(RATES);
    ratesParameter = (Parameter) cxo.getChild(Parameter.class);

    int states = dataType.getStateCount();
    Logger.getLogger("dr.evomodel")
        .info("  General Substitution Model (stateCount=" + states + ")");

    boolean hasRelativeRates =
        cxo.hasChildNamed(RELATIVE_TO)
            || (cxo.hasAttribute(RELATIVE_TO) && cxo.getIntegerAttribute(RELATIVE_TO) > 0);

    int nonReversibleRateCount = ((dataType.getStateCount() - 1) * dataType.getStateCount());
    int reversibleRateCount = (nonReversibleRateCount / 2);

    boolean isNonReversible = ratesParameter.getDimension() == nonReversibleRateCount;
    boolean hasIndicator = xo.hasChildNamed(INDICATOR);

    if (!hasRelativeRates) {
      Parameter indicatorParameter = null;

      if (ratesParameter.getDimension() != reversibleRateCount
          && ratesParameter.getDimension() != nonReversibleRateCount) {
        throw new XMLParseException(
            "Rates parameter in "
                + getParserName()
                + " element should have "
                + (reversibleRateCount)
                + " dimensions for reversible model or "
                + nonReversibleRateCount
                + " dimensions for non-reversible. "
                + "However parameter dimension is "
                + ratesParameter.getDimension());
      }

      if (hasIndicator) { // this is using BSSVS
        cxo = xo.getChild(INDICATOR);
        indicatorParameter = (Parameter) cxo.getChild(Parameter.class);

        if (indicatorParameter.getDimension() != ratesParameter.getDimension()) {
          throw new XMLParseException(
              "Rates and indicator parameters in "
                  + getParserName()
                  + " element must be the same dimension.");
        }

        boolean randomize =
            xo.getAttribute(
                dr.evomodelxml.substmodel.ComplexSubstitutionModelParser.RANDOMIZE, false);
        if (randomize) {
          BayesianStochasticSearchVariableSelection.Utils.randomize(
              indicatorParameter, dataType.getStateCount(), !isNonReversible);
        }
      }

      if (isNonReversible) {
        //                if (xo.hasChildNamed(ROOT_FREQ)) {
        //                    cxo = xo.getChild(ROOT_FREQ);
        //                    FrequencyModel rootFreq = (FrequencyModel)
        // cxo.getChild(FrequencyModel.class);
        //
        //                    if (dataType != rootFreq.getDataType()) {
        //                        throw new XMLParseException("Data type of " + getParserName() + "
        // element does not match that of its rootFrequencyModel.");
        //                    }
        //
        //                    Logger.getLogger("dr.evomodel").info("  Using BSSVS Complex
        // Substitution Model");
        //                    return new SVSComplexSubstitutionModel(getParserName(), dataType,
        // freqModel, ratesParameter, indicatorParameter);
        //
        //                } else {
        //                    throw new XMLParseException("Non-reversible model missing " +
        // ROOT_FREQ + " element");
        //                }
        Logger.getLogger("dr.evomodel").info("  Using BSSVS Complex Substitution Model");
        return new SVSComplexSubstitutionModel(
            getParserName(), dataType, freqModel, ratesParameter, indicatorParameter);
      } else {
        Logger.getLogger("dr.evomodel").info("  Using BSSVS General Substitution Model");
        return new SVSGeneralSubstitutionModel(
            getParserName(), dataType, freqModel, ratesParameter, indicatorParameter);
      }

    } else {
      // if we have relativeTo attribute then we use the old GeneralSubstitutionModel

      if (ratesParameter.getDimension() != reversibleRateCount - 1) {
        throw new XMLParseException(
            "Rates parameter in "
                + getParserName()
                + " element should have "
                + (reversibleRateCount - 1)
                + " dimensions. However parameter dimension is "
                + ratesParameter.getDimension());
      }

      int relativeTo = 0;
      if (hasRelativeRates) {
        relativeTo = cxo.getIntegerAttribute(RELATIVE_TO) - 1;
      }

      if (relativeTo < 0 || relativeTo >= reversibleRateCount) {
        throw new XMLParseException(RELATIVE_TO + " must be 1 or greater");
      } else {
        int t = relativeTo;
        int s = states - 1;
        int row = 0;
        while (t >= s) {
          t -= s;
          s -= 1;
          row += 1;
        }
        int col = t + row + 1;

        Logger.getLogger("dr.evomodel")
            .info("  Rates relative to " + dataType.getCode(row) + "<->" + dataType.getCode(col));
      }

      if (ratesParameter == null) {
        if (reversibleRateCount == 1) {
          // simplest model for binary traits...
        } else {
          throw new XMLParseException("No rates parameter found in " + getParserName());
        }
      }

      return new GeneralSubstitutionModel(
          getParserName(), dataType, freqModel, ratesParameter, relativeTo);
    }
  }
/** Parses a GeneralSubstitutionModel or one of its more specific descendants. */
public class GeneralSubstitutionModelParser extends AbstractXMLObjectParser {

  public static final String GENERAL_SUBSTITUTION_MODEL = "generalSubstitutionModel";
  public static final String DATA_TYPE = "dataType";
  public static final String RATES = "rates";
  public static final String RELATIVE_TO = "relativeTo";
  public static final String FREQUENCIES = "frequencies";
  public static final String INDICATOR = "rateIndicator";

  public static final String SVS_GENERAL_SUBSTITUTION_MODEL = "svsGeneralSubstitutionModel";
  public static final String SVS_COMPLEX_SUBSTITUTION_MODEL = "svsComplexSubstitutionModel";

  public String getParserName() {
    return GENERAL_SUBSTITUTION_MODEL;
  }

  public String[] getParserNames() {
    return new String[] {
      getParserName(), SVS_GENERAL_SUBSTITUTION_MODEL, SVS_COMPLEX_SUBSTITUTION_MODEL
    };
  }

  public Object parseXMLObject(XMLObject xo) throws XMLParseException {

    Parameter ratesParameter = null;
    FrequencyModel freqModel = null;

    if (xo.hasChildNamed(FREQUENCIES)) {
      XMLObject cxo = xo.getChild(FREQUENCIES);
      freqModel = (FrequencyModel) cxo.getChild(FrequencyModel.class);
    }

    DataType dataType = DataTypeUtils.getDataType(xo);

    if (dataType == null) dataType = (DataType) xo.getChild(DataType.class);

    //        if (xo.hasAttribute(DataType.DATA_TYPE)) {
    //            String dataTypeStr = xo.getStringAttribute(DataType.DATA_TYPE);
    //            if (dataTypeStr.equals(Nucleotides.DESCRIPTION)) {
    //                dataType = Nucleotides.INSTANCE;
    //            } else if (dataTypeStr.equals(AminoAcids.DESCRIPTION)) {
    //                dataType = AminoAcids.INSTANCE;
    //            } else if (dataTypeStr.equals(Codons.DESCRIPTION)) {
    //                dataType = Codons.UNIVERSAL;
    //            } else if (dataTypeStr.equals(TwoStates.DESCRIPTION)) {
    //                dataType = TwoStates.INSTANCE;
    //            }
    //        }

    if (dataType == null) dataType = freqModel.getDataType();

    if (dataType != freqModel.getDataType()) {
      throw new XMLParseException(
          "Data type of "
              + getParserName()
              + " element does not match that of its frequencyModel.");
    }

    XMLObject cxo = xo.getChild(RATES);
    ratesParameter = (Parameter) cxo.getChild(Parameter.class);

    int states = dataType.getStateCount();
    Logger.getLogger("dr.evomodel")
        .info("  General Substitution Model (stateCount=" + states + ")");

    boolean hasRelativeRates =
        cxo.hasChildNamed(RELATIVE_TO)
            || (cxo.hasAttribute(RELATIVE_TO) && cxo.getIntegerAttribute(RELATIVE_TO) > 0);

    int nonReversibleRateCount = ((dataType.getStateCount() - 1) * dataType.getStateCount());
    int reversibleRateCount = (nonReversibleRateCount / 2);

    boolean isNonReversible = ratesParameter.getDimension() == nonReversibleRateCount;
    boolean hasIndicator = xo.hasChildNamed(INDICATOR);

    if (!hasRelativeRates) {
      Parameter indicatorParameter = null;

      if (ratesParameter.getDimension() != reversibleRateCount
          && ratesParameter.getDimension() != nonReversibleRateCount) {
        throw new XMLParseException(
            "Rates parameter in "
                + getParserName()
                + " element should have "
                + (reversibleRateCount)
                + " dimensions for reversible model or "
                + nonReversibleRateCount
                + " dimensions for non-reversible. "
                + "However parameter dimension is "
                + ratesParameter.getDimension());
      }

      if (hasIndicator) { // this is using BSSVS
        cxo = xo.getChild(INDICATOR);
        indicatorParameter = (Parameter) cxo.getChild(Parameter.class);

        if (indicatorParameter.getDimension() != ratesParameter.getDimension()) {
          throw new XMLParseException(
              "Rates and indicator parameters in "
                  + getParserName()
                  + " element must be the same dimension.");
        }

        boolean randomize =
            xo.getAttribute(
                dr.evomodelxml.substmodel.ComplexSubstitutionModelParser.RANDOMIZE, false);
        if (randomize) {
          BayesianStochasticSearchVariableSelection.Utils.randomize(
              indicatorParameter, dataType.getStateCount(), !isNonReversible);
        }
      }

      if (isNonReversible) {
        //                if (xo.hasChildNamed(ROOT_FREQ)) {
        //                    cxo = xo.getChild(ROOT_FREQ);
        //                    FrequencyModel rootFreq = (FrequencyModel)
        // cxo.getChild(FrequencyModel.class);
        //
        //                    if (dataType != rootFreq.getDataType()) {
        //                        throw new XMLParseException("Data type of " + getParserName() + "
        // element does not match that of its rootFrequencyModel.");
        //                    }
        //
        //                    Logger.getLogger("dr.evomodel").info("  Using BSSVS Complex
        // Substitution Model");
        //                    return new SVSComplexSubstitutionModel(getParserName(), dataType,
        // freqModel, ratesParameter, indicatorParameter);
        //
        //                } else {
        //                    throw new XMLParseException("Non-reversible model missing " +
        // ROOT_FREQ + " element");
        //                }
        Logger.getLogger("dr.evomodel").info("  Using BSSVS Complex Substitution Model");
        return new SVSComplexSubstitutionModel(
            getParserName(), dataType, freqModel, ratesParameter, indicatorParameter);
      } else {
        Logger.getLogger("dr.evomodel").info("  Using BSSVS General Substitution Model");
        return new SVSGeneralSubstitutionModel(
            getParserName(), dataType, freqModel, ratesParameter, indicatorParameter);
      }

    } else {
      // if we have relativeTo attribute then we use the old GeneralSubstitutionModel

      if (ratesParameter.getDimension() != reversibleRateCount - 1) {
        throw new XMLParseException(
            "Rates parameter in "
                + getParserName()
                + " element should have "
                + (reversibleRateCount - 1)
                + " dimensions. However parameter dimension is "
                + ratesParameter.getDimension());
      }

      int relativeTo = 0;
      if (hasRelativeRates) {
        relativeTo = cxo.getIntegerAttribute(RELATIVE_TO) - 1;
      }

      if (relativeTo < 0 || relativeTo >= reversibleRateCount) {
        throw new XMLParseException(RELATIVE_TO + " must be 1 or greater");
      } else {
        int t = relativeTo;
        int s = states - 1;
        int row = 0;
        while (t >= s) {
          t -= s;
          s -= 1;
          row += 1;
        }
        int col = t + row + 1;

        Logger.getLogger("dr.evomodel")
            .info("  Rates relative to " + dataType.getCode(row) + "<->" + dataType.getCode(col));
      }

      if (ratesParameter == null) {
        if (reversibleRateCount == 1) {
          // simplest model for binary traits...
        } else {
          throw new XMLParseException("No rates parameter found in " + getParserName());
        }
      }

      return new GeneralSubstitutionModel(
          getParserName(), dataType, freqModel, ratesParameter, relativeTo);
    }
  }

  // ************************************************************************
  // AbstractXMLObjectParser implementation
  // ************************************************************************

  public String getParserDescription() {
    return "A general reversible model of sequence substitution for any data type.";
  }

  public Class getReturnType() {
    return GeneralSubstitutionModelParser.class;
  }

  public XMLSyntaxRule[] getSyntaxRules() {
    return rules;
  }

  private final XMLSyntaxRule[] rules = {
    new XORRule(
        new StringAttributeRule(
            DataType.DATA_TYPE,
            "The type of sequence data",
            DataType.getRegisteredDataTypeNames(),
            false),
        new ElementRule(DataType.class),
        true),
    new ElementRule(FREQUENCIES, FrequencyModel.class),
    new ElementRule(RATES, new XMLSyntaxRule[] {new ElementRule(Parameter.class)}),
    new ElementRule(
        INDICATOR,
        new XMLSyntaxRule[] {
          new ElementRule(Parameter.class),
        },
        true),
    AttributeRule.newBooleanRule(ComplexSubstitutionModelParser.RANDOMIZE, true),
  };
}
Example #6
0
  public String exportAlignment(Alignment alignment) throws IOException, IllegalArgumentException {

    StringBuffer buffer = new StringBuffer();

    DataType dataType = null;
    int seqLength = 0;

    for (int i = 0; i < alignment.getSequenceCount(); i++) {

      Sequence sequence = alignment.getSequence(i);

      if (sequence.getLength() > seqLength) {
        seqLength = sequence.getLength();
      }

      if (dataType == null) {
        dataType = sequence.getDataType();
      } else if (dataType != sequence.getDataType()) {
        throw new RuntimeException("Sequences must have the same data type.");
      } // END: dataType check
    } // END: sequences loop

    buffer.append("#NEXUS\n");
    buffer.append("begin data;\n");
    buffer.append(
        "\tdimensions"
            + " "
            + "ntax="
            + alignment.getTaxonCount()
            + " "
            + "nchar="
            + seqLength
            + ";\n");
    buffer.append(
        "\tformat datatype="
            + dataType.getDescription()
            + " missing="
            + DataType.UNKNOWN_CHARACTER
            + " gap="
            + DataType.GAP_CHARACTER
            + ";\n");
    buffer.append("\tmatrix\n");

    int maxRowLength = seqLength;
    for (int n = 0; n < Math.ceil((double) seqLength / maxRowLength); n++) {
      for (int i = 0; i < alignment.getSequenceCount(); i++) {

        Sequence sequence = alignment.getSequence(i);

        StringBuilder builder = new StringBuilder("\t");

        appendTaxonName(sequence.getTaxon(), builder);

        String sequenceString = sequence.getSequenceString();

        builder
            .append("\t")
            .append(
                sequenceString.subSequence(
                    n * maxRowLength, Math.min((n + 1) * maxRowLength, sequenceString.length())));
        int shortBy =
            Math.min(Math.min(n * maxRowLength, seqLength) - sequence.getLength(), maxRowLength);

        if (shortBy > 0) {
          for (int j = 0; j < shortBy; j++) {
            builder.append(DataType.GAP_CHARACTER);
          }
        }

        buffer.append(builder + "\n");
      } // END: sequences loop
    }
    buffer.append(";\nend;");

    return buffer.toString();
  } // END: exportAlignment