public Alignment getPairAlignment(int x, int y) { SimpleAlignment pairAlignment = new SimpleAlignment(); StringBuffer sequence0 = new StringBuffer(); StringBuffer sequence1 = new StringBuffer(); DataType dataType = alignment.getDataType(); int stateCount = dataType.getStateCount(); for (int i = 0; i < alignment.getSiteCount(); i++) { int s0 = alignment.getState(x, i); int s1 = alignment.getState(y, i); char c0 = dataType.getChar(s0); char c1 = dataType.getChar(s1); if (s0 < stateCount || s1 < stateCount) { sequence0.append(c0); sequence1.append(c1); } } // trim hanging ends on left int left = 0; while ((dataType.getState(sequence0.charAt(left)) >= stateCount) || (dataType.getState(sequence1.charAt(left)) >= stateCount)) { left += 1; } // trim hanging ends on right int right = sequence0.length() - 1; while ((dataType.getState(sequence0.charAt(right)) >= stateCount) || (dataType.getState(sequence1.charAt(right)) >= stateCount)) { right -= 1; } if (right < left) return null; String sequenceString0 = sequence0.substring(left, right + 1); String sequenceString1 = sequence1.substring(left, right + 1); pairAlignment.addSequence(new Sequence(alignment.getTaxon(x), sequenceString0)); pairAlignment.addSequence(new Sequence(alignment.getTaxon(y), sequenceString1)); return pairAlignment; }
/** * @param tree * @param node * @return and array of the total amount of time spent in each of the discrete states along the * branch above the given node. */ private double[] getProcessValues(final Tree tree, final NodeRef node) { double[] processValues = null; double branchTime = tree.getBranchLength(node); if (mode == Mode.MARKOV_JUMP_PROCESS) { processValues = (double[]) trait.getTrait(tree, node); } else if (mode == Mode.PARSIMONY) { // an approximation to dwell times using parsimony, assuming // the state changes midpoint on the tree. Does a weighted // average of the equally parsimonious state reconstructions // at the top and bottom of each branch. if (treeChanged) { fitchParsimony.initialize(tree); // Debugging test to count work // treeInitializeCounter += 1; // if (treeInitializeCounter % 10 == 0) { // System.err.println("Cnt: "+treeInitializeCounter); // } treeChanged = false; } int[] states = fitchParsimony.getStates(tree, node); int[] parentStates = fitchParsimony.getStates(tree, tree.getParent(node)); processValues = new double[fitchParsimony.getPatterns().getStateCount()]; for (int state : states) { processValues[state] += branchTime / 2; } for (int state : parentStates) { processValues[state] += branchTime / 2; } for (int i = 0; i < processValues.length; i++) { // normalize by the number of equally parsimonious states at each end of the branch // processValues should add up to the total branch length processValues[i] /= (states.length + parentStates.length) / 2; } } else if (mode == Mode.NODE_STATES) { processValues = new double[dataType.getStateCount()]; // if (indicatorParameter != null) { // // this array should be size #states NOT #rates // processValues = new double[indicatorParameter.getDimension()]; // } else { // // this array should be size #states NOT #rates // processValues = new double[rateParameter.getDimension()]; // } // if the states are being sampled - then there is only one possible state at each // end of the branch. int state = ((int[]) trait.getTrait(tree, node))[traitIndex]; processValues[state] += branchTime / 2; int parentState = ((int[]) trait.getTrait(tree, tree.getParent(node)))[traitIndex]; processValues[parentState] += branchTime / 2; } return processValues; }
AbstractCovarionModel( String name, DataType dataType, Parameter frequencies, Parameter hiddenFrequencies) { super(name); this.dataType = dataType; setStateCount(dataType.getStateCount()); updateMatrix = true; freqModel = new CovarionFrequencyModel(dataType, frequencies, hiddenFrequencies); addModel(freqModel); }
public Object parseXMLObject(XMLObject xo) throws XMLParseException { Parameter ratesParameter = null; FrequencyModel freqModel = null; if (xo.hasChildNamed(FREQUENCIES)) { XMLObject cxo = xo.getChild(FREQUENCIES); freqModel = (FrequencyModel) cxo.getChild(FrequencyModel.class); } DataType dataType = DataTypeUtils.getDataType(xo); if (dataType == null) dataType = (DataType) xo.getChild(DataType.class); // if (xo.hasAttribute(DataType.DATA_TYPE)) { // String dataTypeStr = xo.getStringAttribute(DataType.DATA_TYPE); // if (dataTypeStr.equals(Nucleotides.DESCRIPTION)) { // dataType = Nucleotides.INSTANCE; // } else if (dataTypeStr.equals(AminoAcids.DESCRIPTION)) { // dataType = AminoAcids.INSTANCE; // } else if (dataTypeStr.equals(Codons.DESCRIPTION)) { // dataType = Codons.UNIVERSAL; // } else if (dataTypeStr.equals(TwoStates.DESCRIPTION)) { // dataType = TwoStates.INSTANCE; // } // } if (dataType == null) dataType = freqModel.getDataType(); if (dataType != freqModel.getDataType()) { throw new XMLParseException( "Data type of " + getParserName() + " element does not match that of its frequencyModel."); } XMLObject cxo = xo.getChild(RATES); ratesParameter = (Parameter) cxo.getChild(Parameter.class); int states = dataType.getStateCount(); Logger.getLogger("dr.evomodel") .info(" General Substitution Model (stateCount=" + states + ")"); boolean hasRelativeRates = cxo.hasChildNamed(RELATIVE_TO) || (cxo.hasAttribute(RELATIVE_TO) && cxo.getIntegerAttribute(RELATIVE_TO) > 0); int nonReversibleRateCount = ((dataType.getStateCount() - 1) * dataType.getStateCount()); int reversibleRateCount = (nonReversibleRateCount / 2); boolean isNonReversible = ratesParameter.getDimension() == nonReversibleRateCount; boolean hasIndicator = xo.hasChildNamed(INDICATOR); if (!hasRelativeRates) { Parameter indicatorParameter = null; if (ratesParameter.getDimension() != reversibleRateCount && ratesParameter.getDimension() != nonReversibleRateCount) { throw new XMLParseException( "Rates parameter in " + getParserName() + " element should have " + (reversibleRateCount) + " dimensions for reversible model or " + nonReversibleRateCount + " dimensions for non-reversible. " + "However parameter dimension is " + ratesParameter.getDimension()); } if (hasIndicator) { // this is using BSSVS cxo = xo.getChild(INDICATOR); indicatorParameter = (Parameter) cxo.getChild(Parameter.class); if (indicatorParameter.getDimension() != ratesParameter.getDimension()) { throw new XMLParseException( "Rates and indicator parameters in " + getParserName() + " element must be the same dimension."); } boolean randomize = xo.getAttribute( dr.evomodelxml.substmodel.ComplexSubstitutionModelParser.RANDOMIZE, false); if (randomize) { BayesianStochasticSearchVariableSelection.Utils.randomize( indicatorParameter, dataType.getStateCount(), !isNonReversible); } } if (isNonReversible) { // if (xo.hasChildNamed(ROOT_FREQ)) { // cxo = xo.getChild(ROOT_FREQ); // FrequencyModel rootFreq = (FrequencyModel) // cxo.getChild(FrequencyModel.class); // // if (dataType != rootFreq.getDataType()) { // throw new XMLParseException("Data type of " + getParserName() + " // element does not match that of its rootFrequencyModel."); // } // // Logger.getLogger("dr.evomodel").info(" Using BSSVS Complex // Substitution Model"); // return new SVSComplexSubstitutionModel(getParserName(), dataType, // freqModel, ratesParameter, indicatorParameter); // // } else { // throw new XMLParseException("Non-reversible model missing " + // ROOT_FREQ + " element"); // } Logger.getLogger("dr.evomodel").info(" Using BSSVS Complex Substitution Model"); return new SVSComplexSubstitutionModel( getParserName(), dataType, freqModel, ratesParameter, indicatorParameter); } else { Logger.getLogger("dr.evomodel").info(" Using BSSVS General Substitution Model"); return new SVSGeneralSubstitutionModel( getParserName(), dataType, freqModel, ratesParameter, indicatorParameter); } } else { // if we have relativeTo attribute then we use the old GeneralSubstitutionModel if (ratesParameter.getDimension() != reversibleRateCount - 1) { throw new XMLParseException( "Rates parameter in " + getParserName() + " element should have " + (reversibleRateCount - 1) + " dimensions. However parameter dimension is " + ratesParameter.getDimension()); } int relativeTo = 0; if (hasRelativeRates) { relativeTo = cxo.getIntegerAttribute(RELATIVE_TO) - 1; } if (relativeTo < 0 || relativeTo >= reversibleRateCount) { throw new XMLParseException(RELATIVE_TO + " must be 1 or greater"); } else { int t = relativeTo; int s = states - 1; int row = 0; while (t >= s) { t -= s; s -= 1; row += 1; } int col = t + row + 1; Logger.getLogger("dr.evomodel") .info(" Rates relative to " + dataType.getCode(row) + "<->" + dataType.getCode(col)); } if (ratesParameter == null) { if (reversibleRateCount == 1) { // simplest model for binary traits... } else { throw new XMLParseException("No rates parameter found in " + getParserName()); } } return new GeneralSubstitutionModel( getParserName(), dataType, freqModel, ratesParameter, relativeTo); } }
/** Parses a GeneralSubstitutionModel or one of its more specific descendants. */ public class GeneralSubstitutionModelParser extends AbstractXMLObjectParser { public static final String GENERAL_SUBSTITUTION_MODEL = "generalSubstitutionModel"; public static final String DATA_TYPE = "dataType"; public static final String RATES = "rates"; public static final String RELATIVE_TO = "relativeTo"; public static final String FREQUENCIES = "frequencies"; public static final String INDICATOR = "rateIndicator"; public static final String SVS_GENERAL_SUBSTITUTION_MODEL = "svsGeneralSubstitutionModel"; public static final String SVS_COMPLEX_SUBSTITUTION_MODEL = "svsComplexSubstitutionModel"; public String getParserName() { return GENERAL_SUBSTITUTION_MODEL; } public String[] getParserNames() { return new String[] { getParserName(), SVS_GENERAL_SUBSTITUTION_MODEL, SVS_COMPLEX_SUBSTITUTION_MODEL }; } public Object parseXMLObject(XMLObject xo) throws XMLParseException { Parameter ratesParameter = null; FrequencyModel freqModel = null; if (xo.hasChildNamed(FREQUENCIES)) { XMLObject cxo = xo.getChild(FREQUENCIES); freqModel = (FrequencyModel) cxo.getChild(FrequencyModel.class); } DataType dataType = DataTypeUtils.getDataType(xo); if (dataType == null) dataType = (DataType) xo.getChild(DataType.class); // if (xo.hasAttribute(DataType.DATA_TYPE)) { // String dataTypeStr = xo.getStringAttribute(DataType.DATA_TYPE); // if (dataTypeStr.equals(Nucleotides.DESCRIPTION)) { // dataType = Nucleotides.INSTANCE; // } else if (dataTypeStr.equals(AminoAcids.DESCRIPTION)) { // dataType = AminoAcids.INSTANCE; // } else if (dataTypeStr.equals(Codons.DESCRIPTION)) { // dataType = Codons.UNIVERSAL; // } else if (dataTypeStr.equals(TwoStates.DESCRIPTION)) { // dataType = TwoStates.INSTANCE; // } // } if (dataType == null) dataType = freqModel.getDataType(); if (dataType != freqModel.getDataType()) { throw new XMLParseException( "Data type of " + getParserName() + " element does not match that of its frequencyModel."); } XMLObject cxo = xo.getChild(RATES); ratesParameter = (Parameter) cxo.getChild(Parameter.class); int states = dataType.getStateCount(); Logger.getLogger("dr.evomodel") .info(" General Substitution Model (stateCount=" + states + ")"); boolean hasRelativeRates = cxo.hasChildNamed(RELATIVE_TO) || (cxo.hasAttribute(RELATIVE_TO) && cxo.getIntegerAttribute(RELATIVE_TO) > 0); int nonReversibleRateCount = ((dataType.getStateCount() - 1) * dataType.getStateCount()); int reversibleRateCount = (nonReversibleRateCount / 2); boolean isNonReversible = ratesParameter.getDimension() == nonReversibleRateCount; boolean hasIndicator = xo.hasChildNamed(INDICATOR); if (!hasRelativeRates) { Parameter indicatorParameter = null; if (ratesParameter.getDimension() != reversibleRateCount && ratesParameter.getDimension() != nonReversibleRateCount) { throw new XMLParseException( "Rates parameter in " + getParserName() + " element should have " + (reversibleRateCount) + " dimensions for reversible model or " + nonReversibleRateCount + " dimensions for non-reversible. " + "However parameter dimension is " + ratesParameter.getDimension()); } if (hasIndicator) { // this is using BSSVS cxo = xo.getChild(INDICATOR); indicatorParameter = (Parameter) cxo.getChild(Parameter.class); if (indicatorParameter.getDimension() != ratesParameter.getDimension()) { throw new XMLParseException( "Rates and indicator parameters in " + getParserName() + " element must be the same dimension."); } boolean randomize = xo.getAttribute( dr.evomodelxml.substmodel.ComplexSubstitutionModelParser.RANDOMIZE, false); if (randomize) { BayesianStochasticSearchVariableSelection.Utils.randomize( indicatorParameter, dataType.getStateCount(), !isNonReversible); } } if (isNonReversible) { // if (xo.hasChildNamed(ROOT_FREQ)) { // cxo = xo.getChild(ROOT_FREQ); // FrequencyModel rootFreq = (FrequencyModel) // cxo.getChild(FrequencyModel.class); // // if (dataType != rootFreq.getDataType()) { // throw new XMLParseException("Data type of " + getParserName() + " // element does not match that of its rootFrequencyModel."); // } // // Logger.getLogger("dr.evomodel").info(" Using BSSVS Complex // Substitution Model"); // return new SVSComplexSubstitutionModel(getParserName(), dataType, // freqModel, ratesParameter, indicatorParameter); // // } else { // throw new XMLParseException("Non-reversible model missing " + // ROOT_FREQ + " element"); // } Logger.getLogger("dr.evomodel").info(" Using BSSVS Complex Substitution Model"); return new SVSComplexSubstitutionModel( getParserName(), dataType, freqModel, ratesParameter, indicatorParameter); } else { Logger.getLogger("dr.evomodel").info(" Using BSSVS General Substitution Model"); return new SVSGeneralSubstitutionModel( getParserName(), dataType, freqModel, ratesParameter, indicatorParameter); } } else { // if we have relativeTo attribute then we use the old GeneralSubstitutionModel if (ratesParameter.getDimension() != reversibleRateCount - 1) { throw new XMLParseException( "Rates parameter in " + getParserName() + " element should have " + (reversibleRateCount - 1) + " dimensions. However parameter dimension is " + ratesParameter.getDimension()); } int relativeTo = 0; if (hasRelativeRates) { relativeTo = cxo.getIntegerAttribute(RELATIVE_TO) - 1; } if (relativeTo < 0 || relativeTo >= reversibleRateCount) { throw new XMLParseException(RELATIVE_TO + " must be 1 or greater"); } else { int t = relativeTo; int s = states - 1; int row = 0; while (t >= s) { t -= s; s -= 1; row += 1; } int col = t + row + 1; Logger.getLogger("dr.evomodel") .info(" Rates relative to " + dataType.getCode(row) + "<->" + dataType.getCode(col)); } if (ratesParameter == null) { if (reversibleRateCount == 1) { // simplest model for binary traits... } else { throw new XMLParseException("No rates parameter found in " + getParserName()); } } return new GeneralSubstitutionModel( getParserName(), dataType, freqModel, ratesParameter, relativeTo); } } // ************************************************************************ // AbstractXMLObjectParser implementation // ************************************************************************ public String getParserDescription() { return "A general reversible model of sequence substitution for any data type."; } public Class getReturnType() { return GeneralSubstitutionModelParser.class; } public XMLSyntaxRule[] getSyntaxRules() { return rules; } private final XMLSyntaxRule[] rules = { new XORRule( new StringAttributeRule( DataType.DATA_TYPE, "The type of sequence data", DataType.getRegisteredDataTypeNames(), false), new ElementRule(DataType.class), true), new ElementRule(FREQUENCIES, FrequencyModel.class), new ElementRule(RATES, new XMLSyntaxRule[] {new ElementRule(Parameter.class)}), new ElementRule( INDICATOR, new XMLSyntaxRule[] { new ElementRule(Parameter.class), }, true), AttributeRule.newBooleanRule(ComplexSubstitutionModelParser.RANDOMIZE, true), }; }
public String exportAlignment(Alignment alignment) throws IOException, IllegalArgumentException { StringBuffer buffer = new StringBuffer(); DataType dataType = null; int seqLength = 0; for (int i = 0; i < alignment.getSequenceCount(); i++) { Sequence sequence = alignment.getSequence(i); if (sequence.getLength() > seqLength) { seqLength = sequence.getLength(); } if (dataType == null) { dataType = sequence.getDataType(); } else if (dataType != sequence.getDataType()) { throw new RuntimeException("Sequences must have the same data type."); } // END: dataType check } // END: sequences loop buffer.append("#NEXUS\n"); buffer.append("begin data;\n"); buffer.append( "\tdimensions" + " " + "ntax=" + alignment.getTaxonCount() + " " + "nchar=" + seqLength + ";\n"); buffer.append( "\tformat datatype=" + dataType.getDescription() + " missing=" + DataType.UNKNOWN_CHARACTER + " gap=" + DataType.GAP_CHARACTER + ";\n"); buffer.append("\tmatrix\n"); int maxRowLength = seqLength; for (int n = 0; n < Math.ceil((double) seqLength / maxRowLength); n++) { for (int i = 0; i < alignment.getSequenceCount(); i++) { Sequence sequence = alignment.getSequence(i); StringBuilder builder = new StringBuilder("\t"); appendTaxonName(sequence.getTaxon(), builder); String sequenceString = sequence.getSequenceString(); builder .append("\t") .append( sequenceString.subSequence( n * maxRowLength, Math.min((n + 1) * maxRowLength, sequenceString.length()))); int shortBy = Math.min(Math.min(n * maxRowLength, seqLength) - sequence.getLength(), maxRowLength); if (shortBy > 0) { for (int j = 0; j < shortBy; j++) { builder.append(DataType.GAP_CHARACTER); } } buffer.append(builder + "\n"); } // END: sequences loop } buffer.append(";\nend;"); return buffer.toString(); } // END: exportAlignment