public SVDModel(ExampleSet exampleSet, double[] singularValues, Matrix vMatrix) { super(exampleSet); this.vMatrix = vMatrix; this.singularValues = singularValues; this.keepAttributes = false; this.attributeNames = new String[exampleSet.getAttributes().size()]; int counter = 0; for (Attribute attribute : exampleSet.getAttributes()) { attributeNames[counter] = attribute.getName(); counter++; } // compute cumulative values cumulativeSingularValueProportion = new double[singularValues.length]; // insert cumulative sum of singular values singularValuesSum = 0.0d; for (int i = 0; i < singularValues.length; i++) { singularValuesSum += singularValues[i]; cumulativeSingularValueProportion[i] = singularValuesSum; } // now reduce to proportion for (int i = 0; i < singularValues.length; i++) { cumulativeSingularValueProportion[i] /= singularValuesSum; } }
public PCAModel(ExampleSet eSet, double[] eigenvalues, double[][] eigenvectors) { super(eSet); this.keepAttributes = false; this.attributeNames = new String[eSet.getAttributes().size()]; this.means = new double[eSet.getAttributes().size()]; int counter = 0; eSet.recalculateAllAttributeStatistics(); // ensures that the statistics were created for (Attribute attribute : eSet.getAttributes()) { attributeNames[counter] = attribute.getName(); means[counter] = eSet.getStatistics(attribute, Statistics.AVERAGE); counter++; } this.eigenVectors = new ArrayList<Eigenvector>(eigenvalues.length); for (int i = 0; i < eigenvalues.length; i++) { double[] currentEigenVector = new double[eSet.getAttributes().size()]; for (int j = 0; j < currentEigenVector.length; j++) { currentEigenVector[j] = eigenvectors[j][i]; } this.eigenVectors.add(new Eigenvector(currentEigenVector, eigenvalues[i])); } // order the eigenvectors by the eigenvalues Collections.sort(this.eigenVectors); calculateCumulativeVariance(); }
/** * Creates attribute meta data that represents the attribute that will be generated for the * provided arguments. * * @return the {@link AttributeMetaData} for the provided arguments */ public static AttributeMetaData generateAttributeMetaData( ExampleSet exampleSet, String name, ExpressionType expressionType) { AttributeMetaData newAttribute = null; Attribute existingAtt = exampleSet.getAttributes().get(name); int ontology = expressionType.getAttributeType(); if (ontology == Ontology.BINOMINAL) { newAttribute = new AttributeMetaData(name, Ontology.BINOMINAL); HashSet<String> values = new HashSet<>(); values.add("false"); values.add("true"); newAttribute.setValueSet(values, SetRelation.EQUAL); } else { newAttribute = new AttributeMetaData(name, ontology); } // restore role if attribute existed already if (existingAtt != null) { newAttribute.setRole(exampleSet.getAttributes().getRole(existingAtt).getSpecialName()); } return newAttribute; }
private static Map<Integer, MeanVariance> createMeanVariances( com.rapidminer.example.ExampleSet exampleSet) { double[] sum = new double[exampleSet.getAttributes().size()]; double[] squaredSum = new double[sum.length]; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); while (reader.hasNext()) { com.rapidminer.example.Example example = reader.next(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = example.getValue(attribute); sum[a] += value; squaredSum[a] += value * value; a++; } } Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>(); for (int a = 0; a < sum.length; a++) { sum[a] /= exampleSet.size(); squaredSum[a] /= exampleSet.size(); meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a]))); } return meanVariances; }
@Override public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException { boolean round = getParameterAsBoolean(PARAMETER_ROUND); List<Attribute> newAttributes = new LinkedList<Attribute>(); Iterator<Attribute> a = exampleSet.getAttributes().iterator(); while (a.hasNext()) { Attribute attribute = a.next(); if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL)) && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) { Attribute newAttribute = AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER); newAttributes.add(newAttribute); exampleSet.getExampleTable().addAttribute(newAttribute); for (Example example : exampleSet) { double originalValue = example.getValue(attribute); if (Double.isNaN(originalValue)) { example.setValue(newAttribute, Double.NaN); } else { long newValue = round ? Math.round(originalValue) : (long) originalValue; example.setValue(newAttribute, newValue); } } a.remove(); } } for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute); return exampleSet; }
/** Trains a model using an ExampleSet from the input. Uses the method learn(ExampleSet). */ @Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); // some checks if (exampleSet.getAttributes().getLabel() == null) { throw new UserError(this, 105, new Object[0]); } if (exampleSet.getAttributes().size() == 0) { throw new UserError(this, 106, new Object[0]); } // check capabilities and produce errors if they are not fulfilled CapabilityCheck check = new CapabilityCheck( this, Tools.booleanValue( ParameterService.getParameterValue( CapabilityProvider.PROPERTY_RAPIDMINER_GENERAL_CAPABILITIES_WARN), true)); check.checkLearnerCapabilities(this, exampleSet); Model model = learn(exampleSet); modelOutput.deliver(model); exampleSetOutput.deliver(exampleSet); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { String firstName = getParameterAsString(PARAMETER_FIRST_ATTRIBUTE); String secondName = getParameterAsString(PARAMETER_SECOND_ATTRIBUTE); AttributeRole firstRole = exampleSet.getAttributes().getRole(firstName); AttributeRole secondRole = exampleSet.getAttributes().getRole(secondName); if (firstRole == null) { throw new AttributeNotFoundError(this, PARAMETER_FIRST_ATTRIBUTE, firstName); } if (secondRole == null) { throw new AttributeNotFoundError(this, PARAMETER_SECOND_ATTRIBUTE, secondName); } String firstRoleName = firstRole.getSpecialName(); String secondRoleName = secondRole.getSpecialName(); firstRole.changeToRegular(); secondRole.changeToRegular(); firstRole.setSpecial(secondRoleName); secondRole.setSpecial(firstRoleName); return exampleSet; }
@Override public Model learn(ExampleSet exampleSet) throws OperatorException { Kernel kernel = getKernel(); kernel.init(exampleSet); double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE); NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping(); String classNeg = labelMapping.getNegativeString(); String classPos = labelMapping.getPositiveString(); double classValueNeg = labelMapping.getNegativeIndex(); int numberOfAttributes = exampleSet.getAttributes().size(); HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel); model.init(new double[numberOfAttributes], 0); for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) { double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate); Attributes attributes = exampleSet.getAttributes(); for (Example example : exampleSet) { double prediction = model.predict(example); if (prediction != example.getLabel()) { double direction = (example.getLabel() == classValueNeg) ? -1 : 1; // adapting intercept model.setIntercept(model.getIntercept() + learnRate * direction); // adapting coefficients double coefficients[] = model.getCoefficients(); int i = 0; for (Attribute attribute : attributes) { coefficients[i] += learnRate * direction * example.getValue(attribute); i++; } } } } return model; }
private BasicNetwork getNetwork(ExampleSet exampleSet) throws OperatorException { BasicNetwork network = new BasicNetwork(); // input layer network.addLayer(new FeedforwardLayer(exampleSet.getAttributes().size())); // hidden layers log("No hidden layers defined. Using default hidden layers."); int layerSize = getParameterAsInt(PARAMETER_DEFAULT_HIDDEN_LAYER_SIZE); if (layerSize <= 0) layerSize = getDefaultLayerSize(exampleSet); for (int p = 0; p < getParameterAsInt(PARAMETER_DEFAULT_NUMBER_OF_HIDDEN_LAYERS); p++) { network.addLayer(new FeedforwardLayer(layerSize)); } // output layer if (exampleSet.getAttributes().getLabel().isNominal()) { network.addLayer(new FeedforwardLayer(new ActivationSigmoid(), 1)); } else { network.addLayer(new FeedforwardLayer(new ActivationLinear(), 1)); } network.reset( RandomGenerator.getRandomGenerator( getParameterAsBoolean(RandomGenerator.PARAMETER_USE_LOCAL_RANDOM_SEED), getParameterAsInt(RandomGenerator.PARAMETER_LOCAL_RANDOM_SEED))); return network; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME); Long offset = getParameterAsLong(PARMETER_TIME_OFFSET); Attribute numericalAttribute = exampleSet.getAttributes().get(attributeName); if (numericalAttribute == null) { throw new UserError(this, 111, attributeName); } Attribute newAttribute = AttributeFactory.createAttribute(Ontology.DATE_TIME); exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); for (Example example : exampleSet) { double value = example.getValue(numericalAttribute); if (Double.isNaN(value)) { example.setValue(newAttribute, value); } else { value += offset; example.setValue(newAttribute, value); } } if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) { AttributeRole oldRole = exampleSet.getAttributes().getRole(numericalAttribute); exampleSet.getAttributes().remove(numericalAttribute); newAttribute.setName(attributeName); exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName()); } else { newAttribute.setName(attributeName + "_AS_DATE"); } return exampleSet; }
/** * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha * values and b are zero, the label will be set if it is known. */ public SVMExamples( com.rapidminer.example.ExampleSet exampleSet, Attribute labelAttribute, Map<Integer, MeanVariance> meanVariances) { this(exampleSet.size(), 0.0d); this.meanVarianceMap = meanVariances; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); Attribute idAttribute = exampleSet.getAttributes().getId(); int exampleCounter = 0; while (reader.hasNext()) { com.rapidminer.example.Example current = reader.next(); Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = current.getValue(attribute); if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) { attributeMap.put(a, value); } if ((a + 1) > dim) { dim = (a + 1); } a++; } atts[exampleCounter] = new double[attributeMap.size()]; index[exampleCounter] = new int[attributeMap.size()]; Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator(); int attributeCounter = 0; while (i.hasNext()) { Map.Entry<Integer, Double> e = i.next(); Integer indexValue = e.getKey(); Double attributeValue = e.getValue(); index[exampleCounter][attributeCounter] = indexValue.intValue(); double value = attributeValue.doubleValue(); MeanVariance meanVariance = meanVarianceMap.get(indexValue); if (meanVariance != null) { if (meanVariance.getVariance() == 0.0d) { value = 0.0d; } else { value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance()); } } atts[exampleCounter][attributeCounter] = value; attributeCounter++; } if (labelAttribute != null) { double label = current.getValue(labelAttribute); if (labelAttribute.isNominal()) { ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1); } else { ys[exampleCounter] = label; } } if (idAttribute != null) { ids[exampleCounter] = current.getValueAsString(idAttribute); } exampleCounter++; } }
private List<AggregationFunction> createAggreationFunctions(ExampleSet exampleSet) throws OperatorException { // load global switches boolean ignoreMissings = getParameterAsBoolean(PARAMETER_IGNORE_MISSINGS); boolean countOnlyDistinct = getParameterAsBoolean(PARAMETER_ONLY_DISTINCT); // creating data structures for building aggregates List<AggregationFunction> aggregationFunctions = new LinkedList<AggregationFunction>(); // building functions for all explicitly defined aggregation attributes Set<Attribute> explicitlyAggregatedAttributes = new HashSet<Attribute>(); List<String[]> aggregationFunctionPairs = getParameterList(PARAMETER_AGGREGATION_ATTRIBUTES); for (String[] aggregationFunctionPair : aggregationFunctionPairs) { Attribute attribute = exampleSet.getAttributes().get(aggregationFunctionPair[0]); if (attribute == null) { throw new UserError( this, "aggregation.aggregation_attribute_not_present", aggregationFunctionPair[0]); } AggregationFunction function = AggregationFunction.createAggregationFunction( aggregationFunctionPair[1], attribute, ignoreMissings, countOnlyDistinct); if (!function.isCompatible()) { throw new UserError( this, "aggregation.incompatible_attribute_type", attribute.getName(), aggregationFunctionPair[1]); } // adding objects for this attribute to structure explicitlyAggregatedAttributes.add(attribute); aggregationFunctions.add(function); } // building the default aggregations if (getParameterAsBoolean(PARAMETER_USE_DEFAULT_AGGREGATION)) { String defaultAggregationFunctionName = getParameterAsString(PARAMETER_DEFAULT_AGGREGATION_FUNCTION); Iterator<Attribute> iterator = attributeSelector.getAttributeSubset(exampleSet, false).iterator(); if (getCompatibilityLevel().isAtMost(VERSION_5_2_8)) { iterator = exampleSet.getAttributes().iterator(); } while (iterator.hasNext()) { Attribute attribute = iterator.next(); if (!explicitlyAggregatedAttributes.contains(attribute)) { AggregationFunction function = AggregationFunction.createAggregationFunction( defaultAggregationFunctionName, attribute, ignoreMissings, countOnlyDistinct); if (function.isCompatible()) { aggregationFunctions.add(function); } } } } return aggregationFunctions; }
/* * Extracts an example set containing just the two specified * attributes and no missing values. * * @param eSet the source example set * @param a the first attribute to extract * @param b the second attribute to extract * @return the reduced example set */ private static ExampleSet extract(ExampleSet eSet, Attribute a, Attribute b) { // create a new example set containing just attributes a and b ExampleSet e = (ExampleSet) eSet.clone(); e.getAttributes().clearRegular(); e.getAttributes().clearSpecial(); e.getAttributes().addRegular(a); e.getAttributes().addRegular(b); return new ConditionedExampleSet(e, new NoMissingAttributesCondition(e, null)); }
/** * Helper method replacing <code>Model.createPredictedLabel(ExampleSet)</code> in order to lower * memory consumption. */ private static void createOrReplacePredictedLabelFor(ExampleSet exampleSet, Model model) { Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel(); if (predictedLabel != null) { // remove old predicted label exampleSet.getAttributes().remove(predictedLabel); exampleSet.getExampleTable().removeAttribute(predictedLabel); } // model.createPredictedLabel(exampleSet); // not longer necessary since // label creation is done by model.apply(...). }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // recall: difference = minuend - subtrahend // but the subtrahend is last on the ioobjects stack, so pop first ExampleSet subtrahendSet = subtrahendInput.getData(ExampleSet.class); ExampleSet minuendSet = exampleSet; subtrahendSet.remapIds(); minuendSet.remapIds(); Attribute minuendId = minuendSet.getAttributes().getId(); Attribute subtrahendId = subtrahendSet.getAttributes().getId(); // sanity checks if ((minuendId == null) || (subtrahendId == null)) { throw new UserError(this, 129); } if (minuendId.getValueType() != subtrahendId.getValueType()) { throw new UserError( this, 120, new Object[] { subtrahendId.getName(), Ontology.VALUE_TYPE_NAMES[subtrahendId.getValueType()], Ontology.VALUE_TYPE_NAMES[minuendId.getValueType()] }); } List<Integer> indices = new LinkedList<>(); { int i = 0; for (Example example : minuendSet) { double id = example.getValue(minuendId); Example subtrahendExample = null; if (minuendId.isNominal()) { subtrahendExample = subtrahendSet.getExampleFromId( subtrahendId.getMapping().getIndex(minuendId.getMapping().mapIndex((int) id))); } else { subtrahendExample = subtrahendSet.getExampleFromId(id); } if (subtrahendExample == null) { indices.add(i); } i++; } } int[] indexArray = new int[indices.size()]; for (int i = 0; i < indices.size(); i++) { indexArray[i] = indices.get(i); } ExampleSet minusSet = new MappedExampleSet(minuendSet, indexArray); return minusSet; }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(); IEntityMapping user_mapping = new EntityMapping(); IEntityMapping item_mapping = new EntityMapping(); IRatings training_data = new Ratings(); if (exampleSet.getAttributes().getSpecial("user identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getSpecial("item identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getLabel() == null) { throw new UserError(this, 105); } Attributes Att = exampleSet.getAttributes(); AttributeRole ur = Att.getRole("user identification"); Attribute u = ur.getAttribute(); AttributeRole ir = Att.getRole("item identification"); Attribute i = ir.getAttribute(); Attribute ui = Att.getLabel(); for (Example example : exampleSet) { double j = example.getValue(u); int uid = user_mapping.ToInternalID((int) j); j = example.getValue(i); int iid = item_mapping.ToInternalID((int) j); double r = example.getValue(ui); training_data.Add(uid, iid, r); } _slopeOne recommendAlg = new _slopeOne(); recommendAlg.user_mapping = user_mapping; recommendAlg.item_mapping = item_mapping; recommendAlg.SetMinRating(getParameterAsInt("Min Rating")); recommendAlg.SetMaxRating(recommendAlg.GetMinRating() + getParameterAsInt("Range")); recommendAlg.SetRatings(training_data); recommendAlg.Train(); exampleSetOutput.deliver(exampleSet); exampleSetOutput1.deliver(recommendAlg); }
// checking for example set and valid attributes @Override public void init(ExampleSet exampleSet) throws OperatorException { super.init(exampleSet); Tools.onlyNominalAttributes(exampleSet, "nominal similarities"); this.useAttribute = new boolean[exampleSet.getAttributes().size()]; int i = 0; for (Attribute attribute : exampleSet.getAttributes()) { if (attribute.isNominal()) { useAttribute[i] = true; } i++; } }
private void restoreOldWeights(ExampleSet exampleSet) { if (this.oldWeights != null) { // need to reset weights Iterator<Example> reader = exampleSet.iterator(); int i = 0; while (reader.hasNext() && i < this.oldWeights.length) { reader.next().setWeight(this.oldWeights[i++]); } } else { // need to delete the weights attribute Attribute weight = exampleSet.getAttributes().getWeight(); exampleSet.getAttributes().remove(weight); exampleSet.getExampleTable().removeAttribute(weight); } }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); // only use numeric attributes Tools.onlyNumericalAttributes(exampleSet, "KernelPCA"); Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this); Attributes attributes = exampleSet.getAttributes(); int numberOfExamples = exampleSet.size(); // calculating means for later zero centering exampleSet.recalculateAllAttributeStatistics(); double[] means = new double[exampleSet.getAttributes().size()]; int i = 0; for (Attribute attribute : exampleSet.getAttributes()) { means[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE); i++; } // kernel Kernel kernel = Kernel.createKernel(this); // copying zero centered exampleValues ArrayList<double[]> exampleValues = new ArrayList<double[]>(numberOfExamples); i = 0; for (Example columnExample : exampleSet) { double[] columnValues = getAttributeValues(columnExample, attributes, means); exampleValues.add(columnValues); i++; } // filling kernel matrix Matrix kernelMatrix = new Matrix(numberOfExamples, numberOfExamples); for (i = 0; i < numberOfExamples; i++) { for (int j = 0; j < numberOfExamples; j++) { kernelMatrix.set( i, j, kernel.calculateDistance(exampleValues.get(i), exampleValues.get(j))); } } // calculating eigenVectors EigenvalueDecomposition eig = kernelMatrix.eig(); Model model = new KernelPCAModel(exampleSet, means, eig.getV(), exampleValues, kernel); if (exampleSetOutput.isConnected()) { exampleSetOutput.deliver(model.apply(exampleSet)); } originalOutput.deliver(exampleSet); modelOutput.deliver(model); }
private NeuralDataSet getTraining(ExampleSet exampleSet) { double[][] data = new double[exampleSet.size()][exampleSet.getAttributes().size()]; double[][] labels = new double[exampleSet.size()][1]; int index = 0; Attribute label = exampleSet.getAttributes().getLabel(); this.attributeMin = new double[exampleSet.getAttributes().size()]; this.attributeMax = new double[attributeMin.length]; exampleSet.recalculateAllAttributeStatistics(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { this.attributeMin[a] = exampleSet.getStatistics(attribute, Statistics.MINIMUM); this.attributeMax[a] = exampleSet.getStatistics(attribute, Statistics.MAXIMUM); a++; } this.labelMin = exampleSet.getStatistics(label, Statistics.MINIMUM); this.labelMax = exampleSet.getStatistics(label, Statistics.MAXIMUM); for (Example example : exampleSet) { // attributes a = 0; for (Attribute attribute : exampleSet.getAttributes()) { if (attributeMin[a] != attributeMax[a]) { data[index][a] = (example.getValue(attribute) - attributeMin[a]) / (attributeMax[a] - attributeMin[a]); } else { data[index][a] = example.getValue(attribute) - attributeMin[a]; } a++; } // label if (label.isNominal()) { labels[index][0] = example.getValue(label); } else { if (labelMax != labelMin) { labels[index][0] = (example.getValue(label) - labelMin) / (labelMax - labelMin); } else { labels[index][0] = example.getValue(label) - labelMin; } } index++; } return new BasicNeuralDataSet(data, labels); }
@Override public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException { // checking and creating ids if necessary Tools.checkAndCreateIds(exampleSet); // generating assignment RandomGenerator random = RandomGenerator.getRandomGenerator(this); int clusterAssignments[] = new int[exampleSet.size()]; int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS); for (int i = 0; i < exampleSet.size(); i++) { clusterAssignments[i] = random.nextInt(k); } ClusterModel model = new ClusterModel( exampleSet, k, getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED)); model.setClusterAssignments(clusterAssignments, exampleSet); // generating cluster attribute if (addsClusterAttribute()) { Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(cluster); exampleSet.getAttributes().setCluster(cluster); int i = 0; for (Example example : exampleSet) { example.setValue(cluster, "cluster_" + clusterAssignments[i]); i++; } } return model; }
/** Creates the partition builder for the given sampling type. */ private static PartitionBuilder createPartitionBuilder( ExampleSet exampleSet, int samplingType, int seed) { PartitionBuilder builder = null; switch (samplingType) { case LINEAR_SAMPLING: builder = new SimplePartitionBuilder(); break; case SHUFFLED_SAMPLING: builder = new ShuffledPartitionBuilder(true, seed); break; case STRATIFIED_SAMPLING: default: Attribute label = exampleSet.getAttributes().getLabel(); if ((label != null) && (label.isNominal())) builder = new StratifiedPartitionBuilder(exampleSet, true, seed); else { exampleSet .getLog() .logNote( "Example set has no nominal label: using shuffled partition instead of stratified partition!"); builder = new ShuffledPartitionBuilder(true, seed); } break; } return builder; }
@Override public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException { boolean sortMappings = getParameterAsBoolean(PARAMETER_SORT_MAPPING_ALPHABETICALLY); Map<String, MappingTranslation> translations = new HashMap<String, MappingTranslation>(); exampleSet.recalculateAllAttributeStatistics(); for (Attribute attribute : exampleSet.getAttributes()) { MappingTranslation translation = new MappingTranslation((NominalMapping) attribute.getMapping().clone()); if (attribute.isNominal()) { for (String value : attribute.getMapping().getValues()) { double count = exampleSet.getStatistics(attribute, Statistics.COUNT, value); if (count > 0) { translation.newMapping.mapString(value); } } if (translation.newMapping.size() < attribute.getMapping().size()) { if (sortMappings) { translation.newMapping.sortMappings(); } translations.put(attribute.getName(), translation); } } } return new RemoveUnusedNominalValuesModel(exampleSet, translations); }
@Override public AttributeWeights calculateWeights(ExampleSet exampleSet) throws OperatorException { Attributes attributes = exampleSet.getAttributes(); Attribute labelAttribute = attributes.getLabel(); boolean useSquaredCorrelation = getParameterAsBoolean(PARAMETER_SQUARED_CORRELATION); AttributeWeights weights = new AttributeWeights(exampleSet); getProgress().setTotal(attributes.size()); int progressCounter = 0; int exampleSetSize = exampleSet.size(); int exampleCounter = 0; for (Attribute attribute : attributes) { double correlation = MathFunctions.correlation(exampleSet, labelAttribute, attribute, useSquaredCorrelation); weights.setWeight(attribute.getName(), Math.abs(correlation)); progressCounter++; exampleCounter += exampleSetSize; if (exampleCounter > PROGRESS_UPDATE_STEPS) { exampleCounter = 0; getProgress().setCompleted(progressCounter); } } return weights; }
@Override public void doWork() throws OperatorException { CentroidClusterModel model = modelInput.getData(CentroidClusterModel.class); Attributes trainAttributes = model.getTrainingHeader().getAttributes(); String[] attributeNames = model.getAttributeNames(); Attribute[] attributes = new Attribute[attributeNames.length + 1]; for (int i = 0; i < attributeNames.length; i++) { Attribute originalAttribute = trainAttributes.get(attributeNames[i]); attributes[i] = AttributeFactory.createAttribute(attributeNames[i], originalAttribute.getValueType()); if (originalAttribute.isNominal()) { attributes[i].setMapping((NominalMapping) originalAttribute.getMapping().clone()); } } Attribute clusterAttribute = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); attributes[attributes.length - 1] = clusterAttribute; MemoryExampleTable table = new MemoryExampleTable(attributes); for (int i = 0; i < model.getNumberOfClusters(); i++) { double[] data = new double[attributeNames.length + 1]; System.arraycopy(model.getCentroidCoordinates(i), 0, data, 0, attributeNames.length); data[attributeNames.length] = clusterAttribute.getMapping().mapString("cluster_" + i); table.addDataRow(new DoubleArrayDataRow(data)); } ExampleSet resultSet = table.createExampleSet(); resultSet.getAttributes().setSpecialAttribute(clusterAttribute, Attributes.CLUSTER_NAME); modelOutput.deliver(model); exampleSetOutput.deliver(resultSet); }
public SOMModelPlotter(PlotterConfigurationModel settings, ExampleSet exampleSet, Model model) { super(settings); this.model = model; this.exampleSet = exampleSet; this.colorizer = new SOMClassColorizer(exampleSet.getAttributes().getLabel().getMapping().size()); }
@Override public ExampleSet read() throws OperatorException { FileInputStream inStream = null; try { inStream = new FileInputStream(getParameterAsFile(PARAMETER_FASTA_FILE_NAME)); } catch (FileNotFoundException e) { // TODO: "Fill" } FastaReader<DNASequence, NucleotideCompound> fastaReader = new FastaReader<DNASequence, NucleotideCompound>( inStream, new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(), new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())); LinkedHashMap<String, DNASequence> b = null; try { b = fastaReader.process(); } catch (Exception e) { // TODO: "Fill" } String data[][] = new String[0][]; if (b != null) { data = new String[b.size()][2]; int i = 0; for (Map.Entry<String, DNASequence> entry : b.entrySet()) { data[i][0] = entry.getValue().getOriginalHeader(); data[i][1] = entry.getValue().getSequenceAsString(); i++; } } ExampleSet outSet = ExampleSetFactory.createExampleSet(data); outSet.getAttributes().get("att1").setName("DNA name"); outSet.getAttributes().get("att2").setName("Chain"); return outSet; }
/** Creates a new evolutionary SVM optimization. */ public ClassificationEvoOptimization( ExampleSet exampleSet, // training data Kernel kernel, double c, // SVM paras int initType, // start population creation type para int maxIterations, int generationsWithoutImprovement, int popSize, // GA paras int selectionType, double tournamentFraction, boolean keepBest, // selection paras int mutationType, // type of mutation double crossoverProb, boolean showConvergencePlot, boolean showPopulationPlot, ExampleSet holdOutSet, RandomGenerator random, LoggingHandler logging, Operator executingOperator) { super( EvoSVM.createBoundArray(0.0d, exampleSet.size()), EvoSVM.determineMax(c, kernel, exampleSet, selectionType, exampleSet.size()), popSize, exampleSet.size(), initType, maxIterations, generationsWithoutImprovement, selectionType, tournamentFraction, keepBest, mutationType, Double.NaN, crossoverProb, showConvergencePlot, showPopulationPlot, random, logging, executingOperator); this.exampleSet = exampleSet; this.holdOutSet = holdOutSet; this.populationSize = popSize; this.kernel = kernel; this.c = getMax(0); // label values this.ys = new double[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); int index = 0; Attribute label = exampleSet.getAttributes().getLabel(); while (reader.hasNext()) { Example example = reader.next(); ys[index++] = example.getLabel() == label.getMapping().getPositiveIndex() ? 1.0d : -1.0d; } // optimization function this.optimizationFunction = new ClassificationOptimizationFunction(selectionType == NON_DOMINATED_SORTING_SELECTION); }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(); IPosOnlyFeedback training_data = new PosOnlyFeedback(); IEntityMapping user_mapping = new EntityMapping(); IEntityMapping item_mapping = new EntityMapping(); if (exampleSet.getAttributes().getSpecial("user identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getSpecial("item identification") == null) { throw new UserError(this, 105); } Attributes Att = exampleSet.getAttributes(); AttributeRole ur = Att.getRole("user identification"); Attribute u = ur.getAttribute(); AttributeRole ir = Att.getRole("item identification"); Attribute i = ir.getAttribute(); for (Example example : exampleSet) { double j = example.getValue(u); int uid = (int) j; j = example.getValue(i); int iid = (int) j; training_data.Add(user_mapping.ToInternalID(uid), item_mapping.ToInternalID(iid)); checkForStop(); } System.out.println(training_data.GetMaxItemID() + " " + training_data.GetMaxUserID()); Random recommendAlg = new Random(); recommendAlg.SetFeedback(training_data); recommendAlg.user_mapping = user_mapping; recommendAlg.item_mapping = item_mapping; recommendAlg.Train(); exampleSetOutput.deliver(exampleSet); exampleSetOutput1.deliver(recommendAlg); }
/** * Iterates over all models and returns the class with maximum likelihood. * * @param origExampleSet the set of examples to be classified */ @Override public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel) throws OperatorException { final String attributePrefix = "AdaBoostModelPrediction"; final int numLabels = predictedLabel.getMapping().size(); final Attribute[] specialAttributes = new Attribute[numLabels]; OperatorProgress progress = null; if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) { progress = getOperator().getProgress(); progress.setTotal(100); } for (int i = 0; i < numLabels; i++) { specialAttributes[i] = com.rapidminer.example.Tools.createSpecialAttribute( origExampleSet, attributePrefix + i, Ontology.NUMERICAL); if (progress != null) { progress.setCompleted((int) (25.0 * (i + 1) / numLabels)); } } Iterator<Example> reader = origExampleSet.iterator(); int progressCounter = 0; while (reader.hasNext()) { Example example = reader.next(); for (int i = 0; i < specialAttributes.length; i++) { example.setValue(specialAttributes[i], 0); } if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) { progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25); } } reader = origExampleSet.iterator(); for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) { Model model = this.getModel(modelNr); ExampleSet exampleSet = (ExampleSet) origExampleSet.clone(); exampleSet = model.apply(exampleSet); this.updateEstimates(exampleSet, modelNr, specialAttributes); PredictionModel.removePredictedLabel(exampleSet); if (progress != null) { progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50); } } // Turn prediction weights into confidences and a crisp predcition: this.evaluateSpecialAttributes(origExampleSet, specialAttributes); // Clean up attributes: for (int i = 0; i < numLabels; i++) { origExampleSet.getAttributes().remove(specialAttributes[i]); origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]); if (progress != null) { progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75); } } return origExampleSet; }