public void initializeExamples(ClusAttrType[] attrs, RowData data) { // first remove all examplelists from attributes (-> ensembles!) for (int i = 0; i < attrs.length; i++) { ClusAttrType at = attrs[i]; if (at.isSparse()) { ((SparseNumericAttrType) at).resetExamples(); } } for (int i = 0; i < data.getNbRows(); i++) { SparseDataTuple tuple = (SparseDataTuple) data.getTuple(i); tuple.addExampleToAttributes(); } }
public void induceRandomForestRecursive(ClusNode node, RowData data) { ClusAttrType[] attrs = getDescriptiveAttributes(); ArrayList<ClusAttrType> attrList = new ArrayList<ClusAttrType>(); for (int i = 0; i < attrs.length; i++) { ClusAttrType at = attrs[i]; if (at.isSparse()) { if (((SparseNumericAttrType) at).getExampleWeight() >= getSettings().getMinimalWeight()) { attrList.add(at); } } else { attrList.add(at); } } Object[] attrArray = attrList.toArray(); induceRandomForestRecursive2(node, data, attrArray); }
public void induce(ClusNode node, RowData data) { if (getSettings().isEnsembleMode() && ((getSettings().getEnsembleMethod() == Settings.ENSEMBLE_RFOREST) || (getSettings().getEnsembleMethod() == Settings.ENSEMBLE_NOBAGRFOREST))) { induceRandomForest(node, data); } else { ClusAttrType[] attrs = getDescriptiveAttributes(); initializeExamples(attrs, data); ArrayList<ClusAttrType> attrList = new ArrayList<ClusAttrType>(); ArrayList<ArrayList> examplelistList = new ArrayList<ArrayList>(); for (int i = 0; i < attrs.length; i++) { ClusAttrType at = attrs[i]; if (at.isSparse()) { if (((SparseNumericAttrType) at).getExampleWeight() >= getSettings().getMinimalWeight()) { attrList.add(at); Object[] exampleArray = ((SparseNumericAttrType) at).getExamples().toArray(); RowData exampleData = new RowData(exampleArray, exampleArray.length); exampleData.sortSparse((SparseNumericAttrType) at, m_FindBestTest.getSortHelper()); ArrayList<SparseDataTuple> exampleList = new ArrayList<SparseDataTuple>(); for (int j = 0; j < exampleData.getNbRows(); j++) { exampleList.add((SparseDataTuple) exampleData.getTuple(j)); } ((SparseNumericAttrType) at).setExamples(exampleList); examplelistList.add(exampleList); } } else { attrList.add(at); examplelistList.add(null); } } Object[] attrArray = attrList.toArray(); Object[] examplelistArray = examplelistList.toArray(); induce(node, data, attrArray, examplelistArray); } }
public void induce(ClusNode node, RowData data, Object[] attrs, Object[] examplelists) { // System.out.println("INDUCE SPARSE with " + attrs.length + " attributes and " + // data.getNbRows() + " examples"); // Initialize selector and perform various stopping criteria if (initSelectorAndStopCrit(node, data)) { makeLeaf(node); return; } // Find best test for (int i = 0; i < attrs.length; i++) { ClusAttrType at = (ClusAttrType) attrs[i]; ArrayList examplelist = (ArrayList) examplelists[i]; if (at instanceof NominalAttrType) m_FindBestTest.findNominal((NominalAttrType) at, data); else if (examplelist == null) { m_FindBestTest.findNumeric((NumericAttrType) at, data); } else { m_FindBestTest.findNumeric((NumericAttrType) at, examplelist); } } // Partition data + recursive calls CurrentBestTestAndHeuristic best = m_FindBestTest.getBestTest(); if (best.hasBestTest()) { node.testToNode(best); // Output best test if (Settings.VERBOSE > 0) System.out.println("Test: " + node.getTestString() + " -> " + best.getHeuristicValue()); // Create children int arity = node.updateArity(); NodeTest test = node.getTest(); RowData[] subsets = new RowData[arity]; for (int j = 0; j < arity; j++) { subsets[j] = data.applyWeighted(test, j); } if (getSettings().showAlternativeSplits()) { filterAlternativeSplits(node, data, subsets); } if (node != m_Root && getSettings().hasTreeOptimize(Settings.TREE_OPTIMIZE_NO_INODE_STATS)) { // Don't remove statistics of root node; code below depends on them node.setClusteringStat(null); node.setTargetStat(null); } for (int j = 0; j < arity; j++) { ClusNode child = new ClusNode(); node.setChild(child, j); child.initClusteringStat(m_StatManager, m_Root.getClusteringStat(), subsets[j]); child.initTargetStat(m_StatManager, m_Root.getTargetStat(), subsets[j]); ArrayList<ClusAttrType> attrList = new ArrayList<ClusAttrType>(); ArrayList<ArrayList> examplelistList = new ArrayList<ArrayList>(); for (int i = 0; i < attrs.length; i++) { ClusAttrType at = (ClusAttrType) attrs[i]; if (at.isSparse()) { ArrayList newExampleList = ((SparseNumericAttrType) at).pruneExampleList(subsets[j]); double exampleWeight = getExampleWeight(newExampleList); if (exampleWeight >= getSettings().getMinimalWeight()) { attrList.add(at); examplelistList.add(newExampleList); } } else { attrList.add(at); examplelistList.add(null); } } Object[] attrArray = attrList.toArray(); Object[] exampleListArray = examplelistList.toArray(); induce(child, subsets[j], attrArray, exampleListArray); } } else { makeLeaf(node); } }