public void split() { if (m_ilist == null) throw new IllegalStateException("Frozen. Cannot split."); int numLeftChildren = 0; boolean[] toLeftChild = new boolean[m_instIndices.length]; for (int i = 0; i < m_instIndices.length; i++) { Instance instance = m_ilist.get(m_instIndices[i]); FeatureVector fv = (FeatureVector) instance.getData(); if (fv.value(m_gainRatio.getMaxValuedIndex()) <= m_gainRatio.getMaxValuedThreshold()) { toLeftChild[i] = true; numLeftChildren++; } else toLeftChild[i] = false; } logger.info( "leftChild.size=" + numLeftChildren + " rightChild.size=" + (m_instIndices.length - numLeftChildren)); int[] leftIndices = new int[numLeftChildren]; int[] rightIndices = new int[m_instIndices.length - numLeftChildren]; int li = 0, ri = 0; for (int i = 0; i < m_instIndices.length; i++) { if (toLeftChild[i]) leftIndices[li++] = m_instIndices[i]; else rightIndices[ri++] = m_instIndices[i]; } m_leftChild = new Node(m_ilist, this, m_minNumInsts, leftIndices); m_rightChild = new Node(m_ilist, this, m_minNumInsts, rightIndices); }
public Node(InstanceList ilist, Node parent, int minNumInsts, int[] instIndices) { if (instIndices == null) { instIndices = new int[ilist.size()]; for (int ii = 0; ii < instIndices.length; ii++) instIndices[ii] = ii; } m_gainRatio = GainRatio.createGainRatio(ilist, instIndices, minNumInsts); m_ilist = ilist; m_instIndices = instIndices; m_dataDict = m_ilist.getDataAlphabet(); m_minNumInsts = minNumInsts; m_parent = parent; m_leftChild = m_rightChild = null; }
public double computeCostAndPrune() { double costS = getMDL(); if (isLeaf()) return costS + 1; double minCost1 = getLeftChild().computeCostAndPrune(); double minCost2 = getRightChild().computeCostAndPrune(); double costSplit = Math.log(m_gainRatio.getNumSplitPointsForBestFeature()) / GainRatio.log2; double minCostN = Math.min(costS + 1, costSplit + 1 + minCost1 + minCost2); if (Maths.almostEquals(minCostN, costS + 1)) m_leftChild = m_rightChild = null; return minCostN; }
public Object getSplitFeature() { return m_dataDict.lookupObject(m_gainRatio.getMaxValuedIndex()); }