@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); // only use numeric attributes Tools.onlyNumericalAttributes(exampleSet, "KernelPCA"); Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this); Attributes attributes = exampleSet.getAttributes(); int numberOfExamples = exampleSet.size(); // calculating means for later zero centering exampleSet.recalculateAllAttributeStatistics(); double[] means = new double[exampleSet.getAttributes().size()]; int i = 0; for (Attribute attribute : exampleSet.getAttributes()) { means[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE); i++; } // kernel Kernel kernel = Kernel.createKernel(this); // copying zero centered exampleValues ArrayList<double[]> exampleValues = new ArrayList<double[]>(numberOfExamples); i = 0; for (Example columnExample : exampleSet) { double[] columnValues = getAttributeValues(columnExample, attributes, means); exampleValues.add(columnValues); i++; } // filling kernel matrix Matrix kernelMatrix = new Matrix(numberOfExamples, numberOfExamples); for (i = 0; i < numberOfExamples; i++) { for (int j = 0; j < numberOfExamples; j++) { kernelMatrix.set( i, j, kernel.calculateDistance(exampleValues.get(i), exampleValues.get(j))); } } // calculating eigenVectors EigenvalueDecomposition eig = kernelMatrix.eig(); Model model = new KernelPCAModel(exampleSet, means, eig.getV(), exampleValues, kernel); if (exampleSetOutput.isConnected()) { exampleSetOutput.deliver(model.apply(exampleSet)); } originalOutput.deliver(exampleSet); modelOutput.deliver(model); }
@Override public Model learn(ExampleSet exampleSet) throws OperatorException { Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]); ImprovedNeuralNetModel model = new ImprovedNeuralNetModel(exampleSet); List<String[]> hiddenLayers = getParameterList(PARAMETER_HIDDEN_LAYERS); int maxCycles = getParameterAsInt(PARAMETER_TRAINING_CYCLES); double maxError = getParameterAsDouble(PARAMETER_ERROR_EPSILON); double learningRate = getParameterAsDouble(PARAMETER_LEARNING_RATE); double momentum = getParameterAsDouble(PARAMETER_MOMENTUM); boolean decay = getParameterAsBoolean(PARAMETER_DECAY); boolean shuffle = getParameterAsBoolean(PARAMETER_SHUFFLE); boolean normalize = getParameterAsBoolean(PARAMETER_NORMALIZE); RandomGenerator randomGenerator = RandomGenerator.getRandomGenerator(this); model.train( exampleSet, hiddenLayers, maxCycles, maxError, learningRate, momentum, decay, shuffle, normalize, randomGenerator, this); return model; }
/** * Checks if the arguments are compatible with the attributes specified by getInputAttributes(). */ private boolean argumentsOk(ExampleTable input) { Attribute[] inputA = getInputAttributes(); for (int i = 0; i < inputA.length; i++) { if (!Tools.compatible(arguments[i], inputA[i])) return false; } return true; }
@Override public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException { // checking and creating ids if necessary Tools.checkAndCreateIds(exampleSet); // generating assignment RandomGenerator random = RandomGenerator.getRandomGenerator(this); int clusterAssignments[] = new int[exampleSet.size()]; int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS); for (int i = 0; i < exampleSet.size(); i++) { clusterAssignments[i] = random.nextInt(k); } ClusterModel model = new ClusterModel( exampleSet, k, getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED)); model.setClusterAssignments(clusterAssignments, exampleSet); // generating cluster attribute if (addsClusterAttribute()) { Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(cluster); exampleSet.getAttributes().setCluster(cluster); int i = 0; for (Example example : exampleSet) { example.setValue(cluster, "cluster_" + clusterAssignments[i]); i++; } } return model; }
/** * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha * values and b are zero, the label will be set if it is known. */ public SVMExamples( com.rapidminer.example.ExampleSet exampleSet, Attribute labelAttribute, Map<Integer, MeanVariance> meanVariances) { this(exampleSet.size(), 0.0d); this.meanVarianceMap = meanVariances; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); Attribute idAttribute = exampleSet.getAttributes().getId(); int exampleCounter = 0; while (reader.hasNext()) { com.rapidminer.example.Example current = reader.next(); Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = current.getValue(attribute); if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) { attributeMap.put(a, value); } if ((a + 1) > dim) { dim = (a + 1); } a++; } atts[exampleCounter] = new double[attributeMap.size()]; index[exampleCounter] = new int[attributeMap.size()]; Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator(); int attributeCounter = 0; while (i.hasNext()) { Map.Entry<Integer, Double> e = i.next(); Integer indexValue = e.getKey(); Double attributeValue = e.getValue(); index[exampleCounter][attributeCounter] = indexValue.intValue(); double value = attributeValue.doubleValue(); MeanVariance meanVariance = meanVarianceMap.get(indexValue); if (meanVariance != null) { if (meanVariance.getVariance() == 0.0d) { value = 0.0d; } else { value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance()); } } atts[exampleCounter][attributeCounter] = value; attributeCounter++; } if (labelAttribute != null) { double label = current.getValue(labelAttribute); if (labelAttribute.isNominal()) { ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1); } else { ys[exampleCounter] = label; } } if (idAttribute != null) { ids[exampleCounter] = current.getValueAsString(idAttribute); } exampleCounter++; } }
public HyperplaneModel( ExampleSet exampleSet, String classNegative, String classPositive, Kernel kernel) { super(exampleSet); this.coefficientNames = com.rapidminer.example.Tools.getRegularAttributeNames(exampleSet); this.classNegative = classNegative; this.classPositive = classPositive; this.kernel = kernel; }
/** * Iterates over all models and returns the class with maximum likelihood. * * @param origExampleSet the set of examples to be classified */ @Override public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel) throws OperatorException { final String attributePrefix = "AdaBoostModelPrediction"; final int numLabels = predictedLabel.getMapping().size(); final Attribute[] specialAttributes = new Attribute[numLabels]; OperatorProgress progress = null; if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) { progress = getOperator().getProgress(); progress.setTotal(100); } for (int i = 0; i < numLabels; i++) { specialAttributes[i] = com.rapidminer.example.Tools.createSpecialAttribute( origExampleSet, attributePrefix + i, Ontology.NUMERICAL); if (progress != null) { progress.setCompleted((int) (25.0 * (i + 1) / numLabels)); } } Iterator<Example> reader = origExampleSet.iterator(); int progressCounter = 0; while (reader.hasNext()) { Example example = reader.next(); for (int i = 0; i < specialAttributes.length; i++) { example.setValue(specialAttributes[i], 0); } if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) { progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25); } } reader = origExampleSet.iterator(); for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) { Model model = this.getModel(modelNr); ExampleSet exampleSet = (ExampleSet) origExampleSet.clone(); exampleSet = model.apply(exampleSet); this.updateEstimates(exampleSet, modelNr, specialAttributes); PredictionModel.removePredictedLabel(exampleSet); if (progress != null) { progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50); } } // Turn prediction weights into confidences and a crisp predcition: this.evaluateSpecialAttributes(origExampleSet, specialAttributes); // Clean up attributes: for (int i = 0; i < numLabels; i++) { origExampleSet.getAttributes().remove(specialAttributes[i]); origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]); if (progress != null) { progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75); } } return origExampleSet; }
protected boolean checkCompatibility( Attribute attribute, Attribute compatible, String[] functions) { if (Tools.compatible(attribute, compatible)) { for (int f = 0; f < functions.length; f++) { if (attribute.getConstruction().indexOf(functions[f]) != -1) return false; } return true; } else { return false; } }
// checking for example set and valid attributes @Override public void init(ExampleSet exampleSet) throws OperatorException { super.init(exampleSet); Tools.onlyNominalAttributes(exampleSet, "nominal similarities"); this.useAttribute = new boolean[exampleSet.getAttributes().size()]; int i = 0; for (Attribute attribute : exampleSet.getAttributes()) { if (attribute.isNominal()) { useAttribute[i] = true; } i++; } }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); // needed for some measures Tools.checkAndCreateIds(exampleSet); DistanceMeasure measure = measureHelper.getInitializedMeasure(exampleSet); SimilarityMeasureObject measureObject = new SimilarityMeasureObject(measure, exampleSet); ObjectVisualizerService.addObjectVisualizer(measureObject, new ExampleVisualizer(exampleSet)); similarityOutput.deliver(measureObject); exampleSetOutput.deliver(exampleSet); }
@Override public void init(ExampleSet exampleSet) throws OperatorException { super.init(exampleSet); Tools.onlyNumericalAttributes(exampleSet, "value based similarities"); Attributes attributes = exampleSet.getAttributes(); if (attributes.size() != 1) throw new OperatorException( "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence."); for (Example example : exampleSet) { for (Attribute attribute : attributes) { if (example.getValue(attribute) <= 0) throw new OperatorException( "The bregman divergence you've choosen is not applicable for the dataset! Proceeding with the 'Squared Euclidean distance' bregman divergence."); ; } } }
protected void prepareWeights(ExampleSet exampleSet) { Attribute weightAttr = exampleSet.getAttributes().getWeight(); if (weightAttr == null) { this.oldWeights = null; com.rapidminer.example.Tools.createWeightAttribute(exampleSet); } else { // Back up old weights this.oldWeights = new double[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); for (int i = 0; (reader.hasNext() && i < oldWeights.length); i++) { Example example = reader.next(); if (example != null) { this.oldWeights[i] = example.getWeight(); example.setWeight(1); } } } }
public IOObject[] apply() throws OperatorException { ExampleSet eSet = getInput(ExampleSet.class); // only warning, removing is done by createSpecialAttribute(...) Attribute idAttribute = eSet.getAttributes().getId(); if (idAttribute != null) { logWarning("Overwriting old id attribute!"); } // create new id attribute boolean nominalIds = getParameterAsBoolean(PARAMETER_CREATE_NOMINAL_IDS); idAttribute = Tools.createSpecialAttribute( eSet, Attributes.ID_NAME, nominalIds ? Ontology.NOMINAL : Ontology.INTEGER); // set IDs int currentId = 1; Iterator<Example> r = eSet.iterator(); while (r.hasNext()) { Example example = r.next(); example.setValue( idAttribute, nominalIds ? idAttribute.getMapping().mapString("id_" + currentId) : currentId); currentId++; checkForStop(); } // initialize example visualizer Operator visualizer = null; try { visualizer = OperatorService.createOperator(ExampleVisualizationOperator.class); } catch (OperatorCreationException e) { logNote("Cannot initialize example visualizer, skipping..."); } if (visualizer != null) visualizer.apply(new IOContainer(eSet)); return new IOObject[] {eSet}; }
@Override public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException { int k = getParameterAsInt(PARAMETER_K); int maxOptimizationSteps = getParameterAsInt(PARAMETER_MAX_OPTIMIZATION_STEPS); boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_WEIGHTS); Kernel kernel = Kernel.createKernel(this); // init operator progress getProgress().setTotal(maxOptimizationSteps); // checking and creating ids if necessary Tools.checkAndCreateIds(exampleSet); // additional checks Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]); if (exampleSet.size() < k) { throw new UserError(this, 142, k); } // extracting attribute names Attributes attributes = exampleSet.getAttributes(); ArrayList<String> attributeNames = new ArrayList<String>(attributes.size()); for (Attribute attribute : attributes) { attributeNames.add(attribute.getName()); } Attribute weightAttribute = attributes.getWeight(); RandomGenerator generator = RandomGenerator.getRandomGenerator(this); ClusterModel model = new ClusterModel( exampleSet, k, getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED)); // init centroids int[] clusterAssignments = new int[exampleSet.size()]; for (int i = 0; i < exampleSet.size(); i++) { clusterAssignments[i] = generator.nextIntInRange(0, k); } // run optimization steps boolean stable = false; for (int step = 0; step < maxOptimizationSteps && !stable; step++) { // calculating cluster kernel properties double[] clusterWeights = new double[k]; double[] clusterKernelCorrection = new double[k]; int i = 0; for (Example firstExample : exampleSet) { double firstExampleWeight = useExampleWeights ? firstExample.getValue(weightAttribute) : 1d; double[] firstExampleValues = getAsDoubleArray(firstExample, attributes); clusterWeights[clusterAssignments[i]] += firstExampleWeight; int j = 0; for (Example secondExample : exampleSet) { if (clusterAssignments[i] == clusterAssignments[j]) { double secondExampleWeight = useExampleWeights ? secondExample.getValue(weightAttribute) : 1d; clusterKernelCorrection[clusterAssignments[i]] += firstExampleWeight * secondExampleWeight * kernel.calculateDistance( firstExampleValues, getAsDoubleArray(secondExample, attributes)); } j++; } i++; } for (int z = 0; z < k; z++) { clusterKernelCorrection[z] /= clusterWeights[z] * clusterWeights[z]; } // assign examples to new centroids int[] newClusterAssignments = new int[exampleSet.size()]; i = 0; for (Example example : exampleSet) { double[] exampleValues = getAsDoubleArray(example, attributes); double exampleKernelValue = kernel.calculateDistance(exampleValues, exampleValues); double nearestDistance = Double.POSITIVE_INFINITY; int nearestIndex = 0; for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) { double distance = 0; // iterating over all examples in cluster to get kernel distance int j = 0; for (Example clusterExample : exampleSet) { if (clusterAssignments[j] == clusterIndex) { distance += (useExampleWeights ? clusterExample.getValue(weightAttribute) : 1d) * kernel.calculateDistance( getAsDoubleArray(clusterExample, attributes), exampleValues); } j++; } distance *= -2d / clusterWeights[clusterIndex]; // copy in outer loop distance += exampleKernelValue; distance += clusterKernelCorrection[clusterIndex]; if (distance < nearestDistance) { nearestDistance = distance; nearestIndex = clusterIndex; } } newClusterAssignments[i] = nearestIndex; i++; } // finishing assignment stable = true; for (int j = 0; j < exampleSet.size() && stable; j++) { stable &= newClusterAssignments[j] == clusterAssignments[j]; } clusterAssignments = newClusterAssignments; // trigger operator progress getProgress().step(); } // setting last clustering into model model.setClusterAssignments(clusterAssignments, exampleSet); getProgress().complete(); if (addsClusterAttribute()) { Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(cluster); exampleSet.getAttributes().setCluster(cluster); int i = 0; for (Example example : exampleSet) { example.setValue(cluster, "cluster_" + clusterAssignments[i]); i++; } } return model; }
/** * Constructs a <code>Model</code> repeatedly running a weak learner, reweighting the training * example set accordingly, and combining the hypothesis using the available weighted performance * values. */ public Model learn(ExampleSet exampleSet) throws OperatorException { this.runVector = new RunVector(); BayBoostModel ensembleNewBatch = null; BayBoostModel ensembleExtBatch = null; final Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>(); // for // models // and // their // probability // estimates Vector<BayBoostBaseModelInfo> modelInfo2 = new Vector<BayBoostBaseModelInfo>(); this.currentIteration = 0; int firstOpenBatch = 1; // prepare the stream control attribute final Attribute streamControlAttribute; { Attribute attr = null; if ((attr = exampleSet.getAttributes().get(STREAM_CONTROL_ATTRIB_NAME)) == null) streamControlAttribute = com.rapidminer.example.Tools.createSpecialAttribute( exampleSet, STREAM_CONTROL_ATTRIB_NAME, Ontology.INTEGER); else { streamControlAttribute = attr; logWarning( "Attribute with the (reserved) name of the stream control attribute exists. It is probably an old version created by this operator. Trying to recycle it... "); // Resetting the stream control attribute values by overwriting // them with 0 avoids (unlikely) // problems in case the same ExampleSet is passed to this // operator over and over again: Iterator<Example> e = exampleSet.iterator(); while (e.hasNext()) { e.next().setValue(streamControlAttribute, 0); } } } // and the weight attribute if (exampleSet.getAttributes().getWeight() == null) { this.prepareWeights(exampleSet); } boolean estimateFavoursExtBatch = true; // *** The main loop, one iteration per batch: *** Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { // increment batch number, collect batch and evaluate performance of // current model on batch double[] classPriors = this.prepareBatch(++this.currentIteration, reader, streamControlAttribute); ConditionedExampleSet trainingSet = new ConditionedExampleSet( exampleSet, new BatchFilterCondition(streamControlAttribute, this.currentIteration)); final EstimatedPerformance estPerf; // Step 1: apply the ensemble model to the current batch (prediction // phase), evaluate and store result if (ensembleExtBatch != null) { // apply extended batch model first: trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet); this.performance = evaluatePredictions(trainingSet); // unweighted // performance; // then apply new batch model: trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); double newBatchPerformance = evaluatePredictions(trainingSet); // heuristic: use extended batch model for predicting // unclassified instances if (estimateFavoursExtBatch == true) estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false); else estPerf = new EstimatedPerformance("accuracy", newBatchPerformance, trainingSet.size(), false); // final double[] ensembleWeights; // continue with the better model: if (newBatchPerformance > this.performance) { this.performance = newBatchPerformance; firstOpenBatch = Math.max(1, this.currentIteration - 1); // ensembleWeights = ensembleNewBatch.getModelWeights(); } else { modelInfo.clear(); modelInfo.addAll(modelInfo2); // ensembleWeights = ensembleExtBatch.getModelWeights(); } } else if (ensembleNewBatch != null) { trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); this.performance = evaluatePredictions(trainingSet); firstOpenBatch = Math.max(1, this.currentIteration - 1); estPerf = new EstimatedPerformance("accuracy", this.performance, trainingSet.size(), false); } else estPerf = null; // no model ==> no prediction performance if (estPerf != null) { PerformanceVector perf = new PerformanceVector(); perf.addAveragable(estPerf); this.runVector.addVector(perf); } // *** retraining phase *** // Step 2: First reconstruct the initial weighting, if necessary. if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) { this.rescalePriors(trainingSet, classPriors); } estimateFavoursExtBatch = true; // Step 3: Find better weights for existing models and continue // training if (modelInfo.size() > 0) { modelInfo2 = new Vector<BayBoostBaseModelInfo>(); for (BayBoostBaseModelInfo bbbmi : modelInfo) { modelInfo2.add(bbbmi); // BayBoostBaseModelInfo objects // cannot be changed, no deep copy // required } // separate hold out set final double holdOutRatio = this.getParameterAsDouble(PARAMETER_FRACTION_HOLD_OUT_SET); Vector<Example> holdOutExamples = new Vector<Example>(); if (holdOutRatio > 0) { RandomGenerator random = RandomGenerator.getRandomGenerator(this); Iterator<Example> randBatchReader = trainingSet.iterator(); while (randBatchReader.hasNext()) { Example example = randBatchReader.next(); if (random.nextDoubleInRange(0, 1) <= holdOutRatio) { example.setValue(streamControlAttribute, 0); holdOutExamples.add(example); } } // TODO: create new example set // trainingSet.updateCondition(); } // model 1: train one more base classifier boolean trainingExamplesLeft = this.adjustBaseModelWeights(trainingSet, modelInfo); if (trainingExamplesLeft) { // "trainingExamplesLeft" needs to be checked to avoid // exceptions. // Anyway, learning does not make sense, otherwise. if (!this.trainAdditionalModel(trainingSet, modelInfo)) {} } ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors); // model 2: remove last classifier, extend batch, train on // extended batch ExampleSet extendedBatch = // because of the ">=" condition it // is sufficient to remember the // opening batch new ConditionedExampleSet( exampleSet, new BatchFilterCondition(streamControlAttribute, firstOpenBatch)); classPriors = this.prepareExtendedBatch(extendedBatch); if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == true) { this.rescalePriors(extendedBatch, classPriors); } modelInfo2.remove(modelInfo2.size() - 1); trainingExamplesLeft = this.adjustBaseModelWeights(extendedBatch, modelInfo2); // If no training examples are left: no need and chance to // continue training. if (trainingExamplesLeft == false) { ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors); } else { boolean success = this.trainAdditionalModel(extendedBatch, modelInfo2); if (success) { ensembleExtBatch = new BayBoostModel(exampleSet, modelInfo2, classPriors); } else { ensembleExtBatch = null; estimateFavoursExtBatch = false; } } if (holdOutRatio > 0) { Iterator hoEit = holdOutExamples.iterator(); while (hoEit.hasNext()) { ((Example) hoEit.next()).setValue(streamControlAttribute, this.currentIteration); } // TODO: create new example set // trainingSet.updateCondition(); if (ensembleExtBatch != null) { trainingSet = (ConditionedExampleSet) ensembleNewBatch.apply(trainingSet); hoEit = holdOutExamples.iterator(); int errors = 0; while (hoEit.hasNext()) { Example example = (Example) hoEit.next(); if (example.getPredictedLabel() != example.getLabel()) errors++; } double newBatchErr = ((double) errors) / holdOutExamples.size(); trainingSet = (ConditionedExampleSet) ensembleExtBatch.apply(trainingSet); hoEit = holdOutExamples.iterator(); errors = 0; while (hoEit.hasNext()) { Example example = (Example) hoEit.next(); if (example.getPredictedLabel() != example.getLabel()) errors++; } double extBatchErr = ((double) errors) / holdOutExamples.size(); estimateFavoursExtBatch = (extBatchErr <= newBatchErr); if (estimateFavoursExtBatch) { ensembleExtBatch = this.retrainLastWeight(ensembleExtBatch, trainingSet, holdOutExamples); } else ensembleNewBatch = this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples); } else ensembleNewBatch = this.retrainLastWeight(ensembleNewBatch, trainingSet, holdOutExamples); } } else { this.trainAdditionalModel(trainingSet, modelInfo); ensembleNewBatch = new BayBoostModel(exampleSet, modelInfo, classPriors); ensembleExtBatch = null; estimateFavoursExtBatch = false; } } this.restoreOldWeights(exampleSet); return (ensembleExtBatch == null ? ensembleNewBatch : ensembleExtBatch); }
/** Sets the given data for the given index. */ @Override protected void set(int index, double value, double defaultValue) { if (Tools.isDefault(defaultValue, value)) data.remove(index); else data.put(index, value); }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); DistanceMeasure measure = measureHelper.getInitializedMeasure(exampleSet); // additional checks Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]); Tools.checkAndCreateIds(exampleSet); Attribute idAttribute = exampleSet.getAttributes().getId(); boolean idAttributeIsNominal = idAttribute.isNominal(); DistanceMatrix matrix = new DistanceMatrix(exampleSet.size()); Map<Integer, HierarchicalClusterNode> clusterMap = new HashMap<Integer, HierarchicalClusterNode>(exampleSet.size()); int[] clusterIds = new int[exampleSet.size()]; // filling the distance matrix int nextClusterId = 0; for (Example example1 : exampleSet) { checkForStop(); clusterIds[nextClusterId] = nextClusterId; int y = 0; for (Example example2 : exampleSet) { if (y > nextClusterId) { matrix.set(nextClusterId, y, measure.calculateDistance(example1, example2)); } y++; } if (idAttributeIsNominal) { clusterMap.put( nextClusterId, new HierarchicalClusterLeafNode(nextClusterId, example1.getValueAsString(idAttribute))); } else { clusterMap.put( nextClusterId, new HierarchicalClusterLeafNode(nextClusterId, example1.getValue(idAttribute))); } nextClusterId++; } // creating linkage method AbstractLinkageMethod linkage = new SingleLinkageMethod(matrix, clusterIds); if (getParameterAsString(PARAMETER_MODE).equals(modes[1])) { linkage = new CompleteLinkageMethod(matrix, clusterIds); } else if (getParameterAsString(PARAMETER_MODE).equals(modes[2])) { linkage = new AverageLinkageMethod(matrix, clusterIds); } // now building agglomerative tree bottom up while (clusterMap.size() > 1) { Agglomeration agglomeration = linkage.getNextAgglomeration(nextClusterId, clusterMap); HierarchicalClusterNode newNode = new HierarchicalClusterNode(nextClusterId, agglomeration.getDistance()); newNode.addSubNode(clusterMap.get(agglomeration.getClusterId1())); newNode.addSubNode(clusterMap.get(agglomeration.getClusterId2())); clusterMap.remove(agglomeration.getClusterId1()); clusterMap.remove(agglomeration.getClusterId2()); clusterMap.put(nextClusterId, newNode); nextClusterId++; } // creating model HierarchicalClusterModel model = new DendogramHierarchicalClusterModel(clusterMap.entrySet().iterator().next().getValue()); // registering visualizer ObjectVisualizerService.addObjectVisualizer( model, new ExampleVisualizer((ExampleSet) exampleSet.clone())); modelOutput.deliver(model); exampleSetOutput.deliver(exampleSet); }
@Override public void init(ExampleSet exampleSet) throws OperatorException { super.init(exampleSet); Tools.onlyNumericalAttributes(exampleSet, "value based similarities"); }