/** * Initialise a CTMain before usage with a first tree, going through and setting up the hash table * etc. * * @param root Root of the initial tree to initialise with * @param noOfSamples Current number of samples taken */ public void initialize(TreeNode root, int noOfSamples) { // Parameter initialisation this.noOfSamples = noOfSamples; noOfTrees = 0; // TaxaMap initialisation List<TreeNode> leaves = root.getLeaves(); noOfTaxa = leaves.size(); // Hash initialisation hashUtils = new HashUtils(); hashUtils.initialize(noOfTaxa, noOfSamples, C, seed); hashTable = new HashTable(hashUtils.m1); // Taxamap initialisation taxa = new TaxaMap(noOfTaxa); for (int i = 0; i < leaves.size(); i++) { taxa.put(leaves.get(i).name, i); } leafEdgeLengths = new double[noOfTaxa]; // Adds a single star partition, once and for all. BitSet star = new BitSet(noOfTaxa); star.flip(0, noOfTaxa); HashEntry entry = new HashEntry(-1, star, 0.0d); entry.count = noOfSamples + 1; partitions.add(entry); // Majority threshold initialisation updateInterestThreshold(); }
/** * Create partitions from an input tree - recursive so will be called by many nodes, beginning * with root but with calculations actually beginning on leaves. * * @param partitions Partitions in the form of entries in the hash table * @param curInterestPercentage Current percentage of interest that we want splits to occur above * to view in the network later */ private ArrayList<Cluster> constructClusters( LinkedList<HashEntry> partitions, double curInterestPercentage) { ArrayList<Cluster> clusters = new ArrayList<Cluster>(); // Thresholds for the current run of the cluster builder (c.f. the threshold for the partitions // list). double curInterestThreshold = (double) (noOfTrees * (curInterestPercentage / 100.0d)); int majInterestThreshold = (int) (noOfTrees * (50.0 / 100.0d)); for (Iterator<HashEntry> it = partitions.iterator(); it.hasNext(); ) { HashEntry entry = it.next(); // Checks if this partition is still above threshold... partition. // If not: remove it - O(1). if ((double) entry.count <= curInterestThreshold) { // correct the isMajority flag to now refer to majority not if of interest or not.. if (entry.count <= majInterestThreshold) { entry.isMajority = false; } it.remove(); continue; } // Constructs clusters (list of TreeNode's for each set bit) from each partition. if ((double) entry.count > curInterestThreshold) { Cluster cluster = new Cluster(); if (entry.count > majInterestThreshold) { cluster.isMajority = true; } cluster.aboveSplit = entry.partition; cluster.noOfOccurrences = entry.count; cluster.edgeLength = entry.edgeLengthsSum / entry.count; for (int i = 0; i < entry.partition.size(); i++) { if (entry.partition.get(i)) { TreeNode node = new TreeNode(taxa.getName(i)); node.edgeLength = leafEdgeLengths[i] / noOfTrees; cluster.add(node); } } clusters.add(cluster); } } // Sort by number of taxa. // TODO: This might obviously be optimized a bit, e.g. with a PriorityQueue. - Eiriksson Collections.sort(clusters); return clusters; }
/** Constructs the majorityTree. Used to make Network so remember to call this first! */ public CTree constructMajorityTree() { int numOfNode = 0; CTree tree = new CTree(); // Creates the clusters. tree.clusters = constructClusters(partitions, resPercentage); // Begins by constructing the star tree. for (TreeNode node : tree.clusters.get(0)) { TreeNode root = tree.getRoot(); // Retrieves the root, node.parent = root; // Parent of this node -> root. root.children.add(node); // Adds this node as the children of the root. tree.nodeList.add(node); // Adds this node to the list of nodes. assert tree.nodeList.get(0).name.equals("root"); tree.parentList.put(node.name, 0); // Adds "this node -> root" parent mapping. } // Constructs internal nodes for the rest of the majority bi-partitions and rewires them. for (int z = 1; z < tree.clusters.size(); z++) { Cluster cluster = tree.clusters.get(z); // only take the majority ones....! if (cluster.isMajority == true) { // 1. Retrieves the parent of the first node in this cluster. TreeNode parent = tree.nodeList.get(tree.parentList.get(cluster.get(0).name)); // 2. Constructs a new internal node. String nodeName = "int" + Integer.toString(numOfNode); TreeNode internalNode = new TreeNode(nodeName); internalNode.addProperty("noOfOccurrences", cluster.noOfOccurrences); internalNode.edgeLength = cluster.edgeLength; internalNode.parent = parent; // 3. Insert the new node into the node list. tree.nodeList.add(internalNode); assert tree.nodeList.get(tree.nodeList.size() - 1).name.equals(internalNode.name); tree.parentList.put(nodeName, tree.nodeList.size() - 1); // update the clusters node references... // (use a method of storing edges that referenced the positions in the Tree's nodelists for // reference later.) tree.clusters.get(z).nodeRefA = tree.parentList.get(cluster.get(0).name); tree.clusters.get(z).nodeRefB = (tree.nodeList.size() - 1); for (TreeNode node : cluster) { // 4. Makes this node the child of the new internal node. node.parent = internalNode; assert node.parent.name.equals(tree.nodeList.get(tree.nodeList.size() - 1).name); tree.parentList.put(node.name, tree.nodeList.size() - 1); internalNode.children.add(node); // 5. Delete the moved node(s) from the parent's children. // TODO: optimize? probably not. for (int i = 0; i < parent.children.size(); i++) { if (parent.children.get(i).name.equals(node.name)) { parent.children.remove(i); break; } } } // Wires up the internal node. parent.children.add(internalNode); numOfNode++; } } return tree; }
/** * Create partitions from an input tree - recursive so will be called by many nodes, beginning * with root but with calculations actually beginning on leaves. * * @param node Node in tree with above split to add to the hash table */ private BitSet createPartitions(TreeNode node) { if (node.isLeaf()) { // Leaf node. int index = taxa.get(node.name); // leaf simply has it's keys stored in the hash utilities node.addProperty("tableHashKey", hashUtils.a1[index]); node.addProperty("bucketHashKey", hashUtils.a2[index]); // Updates the edge length array. leafEdgeLengths[index] += node.edgeLength; // Create a new partition from scratch to represent it BitSet partition = new BitSet(noOfTaxa); partition.set(index); assert partition.cardinality() == 1 : "There should be exactly a single bit set."; return partition; } else { // An internal node: Traverses the tree in post order. BitSet partition = new BitSet(noOfTaxa); // Get the node's partition representation from its children List<TreeNode> children = node.children; for (TreeNode child : children) { partition.or(createPartitions(child)); } // if this node is NOT to the right side of the root then add it... (Avoid adding splits twice // for CNetworks) if (node.parent != root || root.getRight() != node) { noOfPartitions++; long tableKey = 0; long bucketKey = 0; long tableKey2 = 0; long bucketKey2 = 0; // Calculate the hash keys for this partition for (TreeNode child : children) { tableKey += child.getIntProperty("tableHashKey"); bucketKey += child.getIntProperty("bucketHashKey"); } // if first is one then we need to store the flipped version so we store each split in one // representation only. if (partition.get(0) == true) { // copy to a new partition that is the flipped version BitSet partitionF = new BitSet(noOfTaxa); for (int l = 0; l < noOfTaxa; l++) { if (partition.get(l) == false) partitionF.set(l); } // calculate the hash keys for the flipped partition... for (int k = 0; k < noOfTaxa; k++) { if (partitionF.get(k) == true) { tableKey2 += hashUtils.a1[k]; bucketKey2 += hashUtils.a2[k]; } } // store the properties in the node node.addProperty("tableHashKey", (int) (tableKey2 % hashUtils.m1)); node.addProperty("bucketHashKey", (int) (bucketKey2 % hashUtils.m2)); if (noOfPartitions < noOfTaxa - 2) { // Avoids the addition of the star partition hashTable.put( partitionF, node.edgeLength, node.getIntProperty("tableHashKey"), node.getIntProperty("bucketHashKey"), interestThreshold, partitions); } // remember to still return the original partition with its appropriate keys... node.addProperty("tableHashKey", (int) (tableKey % hashUtils.m1)); node.addProperty("bucketHashKey", (int) (bucketKey % hashUtils.m2)); return partition; } // if first is zero then simply add the partition, recursively calculating the hash else { node.addProperty("tableHashKey", (int) (tableKey % hashUtils.m1)); node.addProperty("bucketHashKey", (int) (bucketKey % hashUtils.m2)); if (noOfPartitions < noOfTaxa - 2) { // Avoids the addition of the star partition hashTable.put( partition, node.edgeLength, node.getIntProperty("tableHashKey"), node.getIntProperty("bucketHashKey"), interestThreshold, partitions); } return partition; } } // still return the partition even if node was not added as was right of root... return partition; } }