/** * Generates a clusterer. * * @param data the training instances * @throws Exception if something goes wrong */ @Override public void buildClusterer(Instances data) throws Exception { // can clusterer handle the data ? getCapabilities().testWithFail(data); m_replaceMissing = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); m_replaceMissing.setInputFormat(instances); data = weka.filters.Filter.useFilter(instances, m_replaceMissing); instances = null; // initialize all fields that are not being set via options m_data = data; m_numInstances = m_data.numInstances(); m_numAttributes = m_data.numAttributes(); random = new Random(getSeed()); // initialize the statistics of the input training data input = sIB_ProcessInput(); // object to hold the best partition bestT = new Partition(); // the real clustering double bestL = Double.NEGATIVE_INFINITY; for (int k = 0; k < m_numRestarts; k++) { if (m_verbose) { System.out.format("restart number %s...\n", k); } // initialize the partition and optimize it Partition tmpT = sIB_InitT(input); tmpT = sIB_OptimizeT(tmpT, input); // if a better partition is found, save it if (tmpT.L > bestL) { tmpT.copy(bestT); bestL = bestT.L; } if (m_verbose) { System.out.println("\nPartition status : "); System.out.println("------------------"); System.out.println(tmpT.toString() + "\n"); } } if (m_verbose) { System.out.println("\nBest Partition"); System.out.println("==============="); System.out.println(bestT.toString()); } // save memory m_data = new Instances(m_data, 0); }
/** * Adds Partition to and validates Partition is unique. A duplicate Partition results in an * exception. */ private void addPartition(Partition partition) { if (partitionMap.put(ByteBuffer.wrap(partition.getBytes()), partition) != null) { throw new IllegalStateException("Duplicate Partition detected: " + partition.toString()); } if (partition.getId() >= maxPartitionId) { maxPartitionId = partition.getId() + 1; } }