// Compute scores for new leaf0 and leaf1, and add them to scoreList. private void computeNewLeafScores(SelectedSet selectedSet, int i, int j) { int NS = selectedSet.getN(); for (int a = 0; a < NS; a++) { Individual individual = selectedSet.getIndividual(a); IGraph iterator = decisionGraphs[i].getGraph(); while (!(iterator instanceof Leaf)) { // Iterator is a variable because we are still traversing the DG. int x = ((Variable) iterator).getVariable(); char alleleX = individual.getAllele(x); if (alleleX == '0') { iterator = ((Variable) iterator).getZero(); } else { iterator = ((Variable) iterator).getOne(); } } int itrPosition = decisionGraphs[i].getLeafs().indexOf((Leaf) iterator); if (itrPosition == j || itrPosition == j + 1) { // We've reached one of the new leafs. int mZero = ((Leaf) iterator).getMZero(); int mOne = ((Leaf) iterator).getMOne(); if (mZero > 0 && mOne > 0) { // It's still "interesting" to split. char alleleI = individual.getAllele(i); // Value of Xi in individual a. for (int split : splitList[i]) { char alleleS = individual.getAllele(split); // Value of Xsplit in individual a. if (alleleI == '0') { if (alleleS == '0') { ((Leaf) iterator).addPossibleSplitFrequency(0, split); // m00[split]++; } else { ((Leaf) iterator).addPossibleSplitFrequency(1, split); // m01[split]++; } } else if (alleleS == '0') { ((Leaf) iterator).addPossibleSplitFrequency(2, split); // m10[split]++; } else { ((Leaf) iterator).addPossibleSplitFrequency(3, split); // m11[split]++; } } } } } for (int a = 0; a <= 1; a++) { Leaf newLeaf = decisionGraphs[i].getLeaf(j + a); // The two new leafs are at positions 'j' and 'j+1'. int mZero = newLeaf.getMZero(); int mOne = newLeaf.getMOne(); if (mZero > 0 && mOne > 0) { for (int s : splitList[i]) { int m00 = newLeaf.getPossibleSplitFrequency(0, s); int m01 = newLeaf.getPossibleSplitFrequency(1, s); int m10 = newLeaf.getPossibleSplitFrequency(2, s); int m11 = newLeaf.getPossibleSplitFrequency(3, s); double scoreGain = bayesianMetric.computeScoreGain(mZero, mOne, m00, m01, m10, m11); newLeaf.setScoreGain(s, scoreGain); newLeaf.updateBestSplit( s, scoreGain); // Responsible for updating the value of the best split score gain in this // leaf. } } } // END: for(int a = 0 ...) } // END: computeNewLeafScores(...)
private void generateDecisionGraphs(SelectedSet selectedSet) { this.initializeBN( selectedSet); // Refresh the BN. Compute and store in decreasing order the first score // gains, corresponding to adding a first edge to the empty BN. int SIZE_GENOME = selectedSet.getIndividual(0).getIndividual().length; while (bestScoreGain > 0) { // Search for the best split and store all the necessary information to effectively // perform it. DecisionGraph bestDecisionGraph = decisionGraphs[bestDecisionGraphPos]; int bestLeafPos = bestDecisionGraph.getBestLeafPos(); int bestSplitPos = bestDecisionGraph .getBestLeaf() .getBestSplit(); // Split Variable k. We are adding the edge (Xk)-->(Xi) to the BN. if (parentList[bestDecisionGraphPos].size() >= maxVertexDegree) // Do NOT perform this split because Xi has reached the maximum number // of parents. { bestDecisionGraph.setBestLeafScoreGain( Double .NEGATIVE_INFINITY); // Ensure that this decision graph will no longer allow splits. } else { if (!(splitList[bestDecisionGraphPos].contains( bestSplitPos))) { // Do NOT perform this split because it creates a cycle in the BN. Leaf bestLeaf = bestDecisionGraph.getLeaf(bestLeafPos); bestLeaf.setScoreGain(bestSplitPos, -1); bestLeaf.resetBestSplit(splitList[bestDecisionGraphPos]); } else { // Perform the best split. performBestSplit( bestDecisionGraphPos, bestLeafPos, bestSplitPos, SIZE_GENOME); // Update the BN and remove non-valid splits. computeNewLeafScores(selectedSet, bestDecisionGraphPos, bestLeafPos); } decisionGraphs[bestDecisionGraphPos].updateBestLeaf(); } updateScoreGain(); } // END: while(bestScoreGain > 0) } // END: generateDecisionGraphs(...)
private void initializeBN(SelectedSet selectedSet) { int NS = selectedSet.getN(); int SIZE = selectedSet.getIndividual(0).getIndividual().length; this.bestScoreGain = Double.NEGATIVE_INFINITY; Individual[] individuals = selectedSet.getIndividuals(); for (int i = 0; i < Problem.n; i++) { // Refresh the Bayesian Network structure. parentList[i] = new HashSet<Integer>( Problem.n); // Initial Capacity = stringSize; Default Load Factor = 0.75 adjacencyList[i] = new HashSet<Integer>( Problem.n); // Initial Capacity = stringSize; Default Load Factor = 0.75 splitList[i] = new HashSet<Integer>( 2 * Problem.n); // Initial Capacity = 2*stringSize; Default Load Factor = 0.75 for (int n = 0; n < Problem.n; n++) { if (n != i) { splitList[i].add(n); } } int mOne = selectedSet.getUniFrequencies(i); // Construct the initial single leaf decision graphs. int mZero = NS - mOne; Leaf newLeaf = new Leaf( 0, -1, mZero, mOne, SIZE); // There is only a single leaf per variable, at depth 0, with side -1. if (mZero > 0 && mOne > 0) { // If mZero = 0 or mOne = 0 there is no need to try any split with this leaf. for (int j = 0; j < NS; j++) { char alleleJI = individuals[j].getAllele(i); // Value of Xi in individual j. for (int s : splitList[i]) { char alleleS = individuals[j].getAllele(s); // Value of Xs in individual j. if (alleleJI == '0') { if (alleleS == '0') { newLeaf.addPossibleSplitFrequency(0, s); // m00[s]++; } else { newLeaf.addPossibleSplitFrequency(1, s); // m01[s]++; } } else if (alleleS == '0') { newLeaf.addPossibleSplitFrequency(2, s); // m10[s]++; } else { newLeaf.addPossibleSplitFrequency(3, s); // m11[s]++; } } } for (int s : splitList[i]) { int m00 = newLeaf.getPossibleSplitFrequency(0, s); int m01 = newLeaf.getPossibleSplitFrequency(1, s); int m10 = newLeaf.getPossibleSplitFrequency(2, s); int m11 = newLeaf.getPossibleSplitFrequency(3, s); double scoreGain = bayesianMetric.computeScoreGain(mZero, mOne, m00, m01, m10, m11); newLeaf.setScoreGain(s, scoreGain); newLeaf.updateBestSplit( s, scoreGain); // Responsible for updating the value of the best split score gain in this // leaf. } } // END: if(mZero > 0 ...) decisionGraphs[i] = new DecisionGraph( newLeaf); // Initially each graph as a single leaf and there are n-1 possible splits. decisionGraphs[i].updateBestLeaf(); double scoreGain = decisionGraphs[i].getBestLeafScoreGain(); if (scoreGain > this .bestScoreGain) { // Update the value of the global best score gain and position (DG). this.bestDecisionGraphPos = i; this.bestScoreGain = scoreGain; } } // END: for(int i = 0 ...) } // END: initializeBN(...)