public SVDModel(ExampleSet exampleSet, double[] singularValues, Matrix vMatrix) { super(exampleSet); this.vMatrix = vMatrix; this.singularValues = singularValues; this.keepAttributes = false; this.attributeNames = new String[exampleSet.getAttributes().size()]; int counter = 0; for (Attribute attribute : exampleSet.getAttributes()) { attributeNames[counter] = attribute.getName(); counter++; } // compute cumulative values cumulativeSingularValueProportion = new double[singularValues.length]; // insert cumulative sum of singular values singularValuesSum = 0.0d; for (int i = 0; i < singularValues.length; i++) { singularValuesSum += singularValues[i]; cumulativeSingularValueProportion[i] = singularValuesSum; } // now reduce to proportion for (int i = 0; i < singularValues.length; i++) { cumulativeSingularValueProportion[i] /= singularValuesSum; } }
@Override public AttributeWeights calculateWeights(ExampleSet exampleSet) throws OperatorException { Attributes attributes = exampleSet.getAttributes(); Attribute labelAttribute = attributes.getLabel(); boolean useSquaredCorrelation = getParameterAsBoolean(PARAMETER_SQUARED_CORRELATION); AttributeWeights weights = new AttributeWeights(exampleSet); getProgress().setTotal(attributes.size()); int progressCounter = 0; int exampleSetSize = exampleSet.size(); int exampleCounter = 0; for (Attribute attribute : attributes) { double correlation = MathFunctions.correlation(exampleSet, labelAttribute, attribute, useSquaredCorrelation); weights.setWeight(attribute.getName(), Math.abs(correlation)); progressCounter++; exampleCounter += exampleSetSize; if (exampleCounter > PROGRESS_UPDATE_STEPS) { exampleCounter = 0; getProgress().setCompleted(progressCounter); } } return weights; }
/** * Creates attribute meta data that represents the attribute that will be generated for the * provided arguments. * * @return the {@link AttributeMetaData} for the provided arguments */ public static AttributeMetaData generateAttributeMetaData( ExampleSet exampleSet, String name, ExpressionType expressionType) { AttributeMetaData newAttribute = null; Attribute existingAtt = exampleSet.getAttributes().get(name); int ontology = expressionType.getAttributeType(); if (ontology == Ontology.BINOMINAL) { newAttribute = new AttributeMetaData(name, Ontology.BINOMINAL); HashSet<String> values = new HashSet<>(); values.add("false"); values.add("true"); newAttribute.setValueSet(values, SetRelation.EQUAL); } else { newAttribute = new AttributeMetaData(name, ontology); } // restore role if attribute existed already if (existingAtt != null) { newAttribute.setRole(exampleSet.getAttributes().getRole(existingAtt).getSpecialName()); } return newAttribute; }
@Override public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException { boolean round = getParameterAsBoolean(PARAMETER_ROUND); List<Attribute> newAttributes = new LinkedList<Attribute>(); Iterator<Attribute> a = exampleSet.getAttributes().iterator(); while (a.hasNext()) { Attribute attribute = a.next(); if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL)) && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) { Attribute newAttribute = AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER); newAttributes.add(newAttribute); exampleSet.getExampleTable().addAttribute(newAttribute); for (Example example : exampleSet) { double originalValue = example.getValue(attribute); if (Double.isNaN(originalValue)) { example.setValue(newAttribute, Double.NaN); } else { long newValue = round ? Math.round(originalValue) : (long) originalValue; example.setValue(newAttribute, newValue); } } a.remove(); } } for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute); return exampleSet; }
private static Map<Integer, MeanVariance> createMeanVariances( com.rapidminer.example.ExampleSet exampleSet) { double[] sum = new double[exampleSet.getAttributes().size()]; double[] squaredSum = new double[sum.length]; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); while (reader.hasNext()) { com.rapidminer.example.Example example = reader.next(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = example.getValue(attribute); sum[a] += value; squaredSum[a] += value * value; a++; } } Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>(); for (int a = 0; a < sum.length; a++) { sum[a] /= exampleSet.size(); squaredSum[a] /= exampleSet.size(); meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a]))); } return meanVariances; }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { String firstName = getParameterAsString(PARAMETER_FIRST_ATTRIBUTE); String secondName = getParameterAsString(PARAMETER_SECOND_ATTRIBUTE); AttributeRole firstRole = exampleSet.getAttributes().getRole(firstName); AttributeRole secondRole = exampleSet.getAttributes().getRole(secondName); if (firstRole == null) { throw new AttributeNotFoundError(this, PARAMETER_FIRST_ATTRIBUTE, firstName); } if (secondRole == null) { throw new AttributeNotFoundError(this, PARAMETER_SECOND_ATTRIBUTE, secondName); } String firstRoleName = firstRole.getSpecialName(); String secondRoleName = secondRole.getSpecialName(); firstRole.changeToRegular(); secondRole.changeToRegular(); firstRole.setSpecial(secondRoleName); secondRole.setSpecial(firstRoleName); return exampleSet; }
/** Trains a model using an ExampleSet from the input. Uses the method learn(ExampleSet). */ @Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); // some checks if (exampleSet.getAttributes().getLabel() == null) { throw new UserError(this, 105, new Object[0]); } if (exampleSet.getAttributes().size() == 0) { throw new UserError(this, 106, new Object[0]); } // check capabilities and produce errors if they are not fulfilled CapabilityCheck check = new CapabilityCheck( this, Tools.booleanValue( ParameterService.getParameterValue( CapabilityProvider.PROPERTY_RAPIDMINER_GENERAL_CAPABILITIES_WARN), true)); check.checkLearnerCapabilities(this, exampleSet); Model model = learn(exampleSet); modelOutput.deliver(model); exampleSetOutput.deliver(exampleSet); }
/** * Gets the input data and macro name and iterates over the example set while updating the current * iteration in the given macro. */ @Override public void doWork() throws OperatorException { outExtender.reset(); ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); String iterationMacroName = getParameterAsString(PARAMETER_ITERATION_MACRO); boolean innerSinkIsConnected = exampleSetInnerSink.isConnected(); for (iteration = 1; iteration <= exampleSet.size(); iteration++) { getProcess().getMacroHandler().addMacro(iterationMacroName, String.valueOf(iteration)); // passing in clone or if connected the result from last iteration exampleSetInnerSource.deliver( innerSinkIsConnected ? exampleSet : (ExampleSet) exampleSet.clone()); getSubprocess(0).execute(); inApplyLoop(); if (innerSinkIsConnected) { exampleSet = exampleSetInnerSink.getData(ExampleSet.class); } outExtender.collect(); } getProcess().getMacroHandler().removeMacro(iterationMacroName); exampleSetOutput.deliver(exampleSet); }
@Override public final void doWork() throws OperatorException { ExampleSet inputExampleSet = exampleSetInput.getData(ExampleSet.class); ExampleSet applySet = null; // check for needed copy of original exampleset if (originalOutput.isConnected() && writesIntoExistingData()) { int type = DataRowFactory.TYPE_DOUBLE_ARRAY; if (inputExampleSet.getExampleTable() instanceof MemoryExampleTable) { DataRowReader dataRowReader = inputExampleSet.getExampleTable().getDataRowReader(); if (dataRowReader.hasNext()) { type = dataRowReader.next().getType(); } } // check if type is supported to be copied if (type >= 0) { applySet = MaterializeDataInMemory.materializeExampleSet(inputExampleSet, type); } } if (applySet == null) applySet = (ExampleSet) inputExampleSet.clone(); // we apply on the materialized data, because writing can't take place in views anyway. ExampleSet result = apply(applySet); originalOutput.deliver(inputExampleSet); exampleSetOutput.deliver(result); }
@Override public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException { // checking and creating ids if necessary Tools.checkAndCreateIds(exampleSet); // generating assignment RandomGenerator random = RandomGenerator.getRandomGenerator(this); int clusterAssignments[] = new int[exampleSet.size()]; int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS); for (int i = 0; i < exampleSet.size(); i++) { clusterAssignments[i] = random.nextInt(k); } ClusterModel model = new ClusterModel( exampleSet, k, getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL), getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED)); model.setClusterAssignments(clusterAssignments, exampleSet); // generating cluster attribute if (addsClusterAttribute()) { Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); exampleSet.getExampleTable().addAttribute(cluster); exampleSet.getAttributes().setCluster(cluster); int i = 0; for (Example example : exampleSet) { example.setValue(cluster, "cluster_" + clusterAssignments[i]); i++; } } return model; }
@Override public Model learn(ExampleSet exampleSet) throws OperatorException { Kernel kernel = getKernel(); kernel.init(exampleSet); double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE); NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping(); String classNeg = labelMapping.getNegativeString(); String classPos = labelMapping.getPositiveString(); double classValueNeg = labelMapping.getNegativeIndex(); int numberOfAttributes = exampleSet.getAttributes().size(); HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel); model.init(new double[numberOfAttributes], 0); for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) { double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate); Attributes attributes = exampleSet.getAttributes(); for (Example example : exampleSet) { double prediction = model.predict(example); if (prediction != example.getLabel()) { double direction = (example.getLabel() == classValueNeg) ? -1 : 1; // adapting intercept model.setIntercept(model.getIntercept() + learnRate * direction); // adapting coefficients double coefficients[] = model.getCoefficients(); int i = 0; for (Attribute attribute : attributes) { coefficients[i] += learnRate * direction * example.getValue(attribute); i++; } } } } return model; }
@Override public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException { boolean sortMappings = getParameterAsBoolean(PARAMETER_SORT_MAPPING_ALPHABETICALLY); Map<String, MappingTranslation> translations = new HashMap<String, MappingTranslation>(); exampleSet.recalculateAllAttributeStatistics(); for (Attribute attribute : exampleSet.getAttributes()) { MappingTranslation translation = new MappingTranslation((NominalMapping) attribute.getMapping().clone()); if (attribute.isNominal()) { for (String value : attribute.getMapping().getValues()) { double count = exampleSet.getStatistics(attribute, Statistics.COUNT, value); if (count > 0) { translation.newMapping.mapString(value); } } if (translation.newMapping.size() < attribute.getMapping().size()) { if (sortMappings) { translation.newMapping.sortMappings(); } translations.put(attribute.getName(), translation); } } } return new RemoveUnusedNominalValuesModel(exampleSet, translations); }
@Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME); Long offset = (long) getParameterAsInt(PARMETER_TIME_OFFSET); Attribute numericalAttribute = exampleSet.getAttributes().get(attributeName); if (numericalAttribute == null) { throw new UserError(this, 111, attributeName); } Attribute newAttribute = AttributeFactory.createAttribute(Ontology.DATE_TIME); exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); for (Example example : exampleSet) { double value = example.getValue(numericalAttribute); if (Double.isNaN(value)) { example.setValue(newAttribute, value); } else { value += offset; example.setValue(newAttribute, value); } } if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) { exampleSet.getAttributes().remove(numericalAttribute); newAttribute.setName(attributeName); } else { newAttribute.setName(attributeName + "_AS_DATE"); } return exampleSet; }
private BasicNetwork getNetwork(ExampleSet exampleSet) throws OperatorException { BasicNetwork network = new BasicNetwork(); // input layer network.addLayer(new FeedforwardLayer(exampleSet.getAttributes().size())); // hidden layers log("No hidden layers defined. Using default hidden layers."); int layerSize = getParameterAsInt(PARAMETER_DEFAULT_HIDDEN_LAYER_SIZE); if (layerSize <= 0) layerSize = getDefaultLayerSize(exampleSet); for (int p = 0; p < getParameterAsInt(PARAMETER_DEFAULT_NUMBER_OF_HIDDEN_LAYERS); p++) { network.addLayer(new FeedforwardLayer(layerSize)); } // output layer if (exampleSet.getAttributes().getLabel().isNominal()) { network.addLayer(new FeedforwardLayer(new ActivationSigmoid(), 1)); } else { network.addLayer(new FeedforwardLayer(new ActivationLinear(), 1)); } network.reset( RandomGenerator.getRandomGenerator( getParameterAsBoolean(RandomGenerator.PARAMETER_USE_LOCAL_RANDOM_SEED), getParameterAsInt(RandomGenerator.PARAMETER_LOCAL_RANDOM_SEED))); return network; }
/** Creates the partition builder for the given sampling type. */ private static PartitionBuilder createPartitionBuilder( ExampleSet exampleSet, int samplingType, int seed) { PartitionBuilder builder = null; switch (samplingType) { case LINEAR_SAMPLING: builder = new SimplePartitionBuilder(); break; case SHUFFLED_SAMPLING: builder = new ShuffledPartitionBuilder(true, seed); break; case STRATIFIED_SAMPLING: default: Attribute label = exampleSet.getAttributes().getLabel(); if ((label != null) && (label.isNominal())) builder = new StratifiedPartitionBuilder(exampleSet, true, seed); else { exampleSet .getLog() .logNote( "Example set has no nominal label: using shuffled partition instead of stratified partition!"); builder = new ShuffledPartitionBuilder(true, seed); } break; } return builder; }
/** @see com.rapidminer.operator.OperatorChain#doWork() */ @Override public void doWork() throws OperatorException { List<Operator> nested = this.getImmediateChildren(); log.info("This StreamProcess has {} nested operators", nested.size()); for (Operator op : nested) { log.info(" op: {}", op); if (op instanceof DataStreamOperator) { log.info("Resetting stream-operator {}", op); ((DataStreamOperator) op).reset(); } } log.info("Starting some work in doWork()"); ExampleSet exampleSet = input.getData(ExampleSet.class); log.info("input is an example set with {} examples", exampleSet.size()); int i = 0; Iterator<Example> it = exampleSet.iterator(); while (it.hasNext()) { Example example = it.next(); log.info("Processing example {}", i); DataObject datum = StreamUtils.wrap(example); log.info("Wrapped data-object is: {}", datum); dataStream.deliver(datum); getSubprocess(0).execute(); inApplyLoop(); i++; } // super.doWork(); log.info("doWork() is finished."); }
@Override public void doWork() throws OperatorException { CentroidClusterModel model = modelInput.getData(CentroidClusterModel.class); Attributes trainAttributes = model.getTrainingHeader().getAttributes(); String[] attributeNames = model.getAttributeNames(); Attribute[] attributes = new Attribute[attributeNames.length + 1]; for (int i = 0; i < attributeNames.length; i++) { Attribute originalAttribute = trainAttributes.get(attributeNames[i]); attributes[i] = AttributeFactory.createAttribute(attributeNames[i], originalAttribute.getValueType()); if (originalAttribute.isNominal()) { attributes[i].setMapping((NominalMapping) originalAttribute.getMapping().clone()); } } Attribute clusterAttribute = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL); attributes[attributes.length - 1] = clusterAttribute; MemoryExampleTable table = new MemoryExampleTable(attributes); for (int i = 0; i < model.getNumberOfClusters(); i++) { double[] data = new double[attributeNames.length + 1]; System.arraycopy(model.getCentroidCoordinates(i), 0, data, 0, attributeNames.length); data[attributeNames.length] = clusterAttribute.getMapping().mapString("cluster_" + i); table.addDataRow(new DoubleArrayDataRow(data)); } ExampleSet resultSet = table.createExampleSet(); resultSet.getAttributes().setSpecialAttribute(clusterAttribute, Attributes.CLUSTER_NAME); modelOutput.deliver(model); exampleSetOutput.deliver(resultSet); }
@Override public ExampleSet read() throws OperatorException { FileInputStream inStream = null; try { inStream = new FileInputStream(getParameterAsFile(PARAMETER_FASTA_FILE_NAME)); } catch (FileNotFoundException e) { // TODO: "Fill" } FastaReader<DNASequence, NucleotideCompound> fastaReader = new FastaReader<DNASequence, NucleotideCompound>( inStream, new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(), new DNASequenceCreator(DNACompoundSet.getDNACompoundSet())); LinkedHashMap<String, DNASequence> b = null; try { b = fastaReader.process(); } catch (Exception e) { // TODO: "Fill" } String data[][] = new String[0][]; if (b != null) { data = new String[b.size()][2]; int i = 0; for (Map.Entry<String, DNASequence> entry : b.entrySet()) { data[i][0] = entry.getValue().getOriginalHeader(); data[i][1] = entry.getValue().getSequenceAsString(); i++; } } ExampleSet outSet = ExampleSetFactory.createExampleSet(data); outSet.getAttributes().get("att1").setName("DNA name"); outSet.getAttributes().get("att2").setName("Chain"); return outSet; }
private List<AggregationFunction> createAggreationFunctions(ExampleSet exampleSet) throws OperatorException { // load global switches boolean ignoreMissings = getParameterAsBoolean(PARAMETER_IGNORE_MISSINGS); boolean countOnlyDistinct = getParameterAsBoolean(PARAMETER_ONLY_DISTINCT); // creating data structures for building aggregates List<AggregationFunction> aggregationFunctions = new LinkedList<AggregationFunction>(); // building functions for all explicitly defined aggregation attributes Set<Attribute> explicitlyAggregatedAttributes = new HashSet<Attribute>(); List<String[]> aggregationFunctionPairs = getParameterList(PARAMETER_AGGREGATION_ATTRIBUTES); for (String[] aggregationFunctionPair : aggregationFunctionPairs) { Attribute attribute = exampleSet.getAttributes().get(aggregationFunctionPair[0]); if (attribute == null) { throw new UserError( this, "aggregation.aggregation_attribute_not_present", aggregationFunctionPair[0]); } AggregationFunction function = AggregationFunction.createAggregationFunction( aggregationFunctionPair[1], attribute, ignoreMissings, countOnlyDistinct); if (!function.isCompatible()) { throw new UserError( this, "aggregation.incompatible_attribute_type", attribute.getName(), aggregationFunctionPair[1]); } // adding objects for this attribute to structure explicitlyAggregatedAttributes.add(attribute); aggregationFunctions.add(function); } // building the default aggregations if (getParameterAsBoolean(PARAMETER_USE_DEFAULT_AGGREGATION)) { String defaultAggregationFunctionName = getParameterAsString(PARAMETER_DEFAULT_AGGREGATION_FUNCTION); Iterator<Attribute> iterator = attributeSelector.getAttributeSubset(exampleSet, false).iterator(); if (getCompatibilityLevel().isAtMost(VERSION_5_2_8)) { iterator = exampleSet.getAttributes().iterator(); } while (iterator.hasNext()) { Attribute attribute = iterator.next(); if (!explicitlyAggregatedAttributes.contains(attribute)) { AggregationFunction function = AggregationFunction.createAggregationFunction( defaultAggregationFunctionName, attribute, ignoreMissings, countOnlyDistinct); if (function.isCompatible()) { aggregationFunctions.add(function); } } } } return aggregationFunctions; }
/** * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha * values and b are zero, the label will be set if it is known. */ public SVMExamples( com.rapidminer.example.ExampleSet exampleSet, Attribute labelAttribute, Map<Integer, MeanVariance> meanVariances) { this(exampleSet.size(), 0.0d); this.meanVarianceMap = meanVariances; Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator(); Attribute idAttribute = exampleSet.getAttributes().getId(); int exampleCounter = 0; while (reader.hasNext()) { com.rapidminer.example.Example current = reader.next(); Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>(); int a = 0; for (Attribute attribute : exampleSet.getAttributes()) { double value = current.getValue(attribute); if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) { attributeMap.put(a, value); } if ((a + 1) > dim) { dim = (a + 1); } a++; } atts[exampleCounter] = new double[attributeMap.size()]; index[exampleCounter] = new int[attributeMap.size()]; Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator(); int attributeCounter = 0; while (i.hasNext()) { Map.Entry<Integer, Double> e = i.next(); Integer indexValue = e.getKey(); Double attributeValue = e.getValue(); index[exampleCounter][attributeCounter] = indexValue.intValue(); double value = attributeValue.doubleValue(); MeanVariance meanVariance = meanVarianceMap.get(indexValue); if (meanVariance != null) { if (meanVariance.getVariance() == 0.0d) { value = 0.0d; } else { value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance()); } } atts[exampleCounter][attributeCounter] = value; attributeCounter++; } if (labelAttribute != null) { double label = current.getValue(labelAttribute); if (labelAttribute.isNominal()) { ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1); } else { ys[exampleCounter] = label; } } if (idAttribute != null) { ids[exampleCounter] = current.getValueAsString(idAttribute); } exampleCounter++; } }
public Model learn(ExampleSet exampleSet) throws OperatorException { double value = 0.0; double[] confidences = null; int method = getParameterAsInt(PARAMETER_METHOD); Attribute label = exampleSet.getAttributes().getLabel(); if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) { logWarning( "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!"); method = MODE; } else if ((!label.isNominal()) && (method == MODE)) { logWarning( "Cannot use method '" + METHODS[method] + "' for numerical labels: changing to 'average'!"); method = AVERAGE; } switch (method) { case MEDIAN: double[] labels = new double[exampleSet.size()]; Iterator<Example> r = exampleSet.iterator(); int counter = 0; while (r.hasNext()) { Example example = r.next(); labels[counter++] = example.getValue(example.getAttributes().getLabel()); } java.util.Arrays.sort(labels); value = labels[exampleSet.size() / 2]; break; case AVERAGE: exampleSet.recalculateAttributeStatistics(label); value = exampleSet.getStatistics(label, Statistics.AVERAGE); break; case MODE: exampleSet.recalculateAttributeStatistics(label); value = exampleSet.getStatistics(label, Statistics.MODE); confidences = new double[label.getMapping().size()]; for (int i = 0; i < confidences.length; i++) { confidences[i] = exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i)) / exampleSet.size(); } break; case CONSTANT: value = getParameterAsDouble(PARAMETER_CONSTANT); break; case ATTRIBUTE: return new AttributeDefaultModel( exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME)); default: // cannot happen throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!"); } log( "Default value is '" + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "") + "'."); return new DefaultModel(exampleSet, value, confidences); }
/** * Helper method replacing <code>Model.createPredictedLabel(ExampleSet)</code> in order to lower * memory consumption. */ private static void createOrReplacePredictedLabelFor(ExampleSet exampleSet, Model model) { Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel(); if (predictedLabel != null) { // remove old predicted label exampleSet.getAttributes().remove(predictedLabel); exampleSet.getExampleTable().removeAttribute(predictedLabel); } // model.createPredictedLabel(exampleSet); // not longer necessary since // label creation is done by model.apply(...). }
/* * Extracts an example set containing just the two specified * attributes and no missing values. * * @param eSet the source example set * @param a the first attribute to extract * @param b the second attribute to extract * @return the reduced example set */ private static ExampleSet extract(ExampleSet eSet, Attribute a, Attribute b) { // create a new example set containing just attributes a and b ExampleSet e = (ExampleSet) eSet.clone(); e.getAttributes().clearRegular(); e.getAttributes().clearSpecial(); e.getAttributes().addRegular(a); e.getAttributes().addRegular(b); return new ConditionedExampleSet(e, new NoMissingAttributesCondition(e, null)); }
@Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(); IEntityMapping user_mapping = new EntityMapping(); IEntityMapping item_mapping = new EntityMapping(); IRatings training_data = new Ratings(); if (exampleSet.getAttributes().getSpecial("user identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getSpecial("item identification") == null) { throw new UserError(this, 105); } if (exampleSet.getAttributes().getLabel() == null) { throw new UserError(this, 105); } Attributes Att = exampleSet.getAttributes(); AttributeRole ur = Att.getRole("user identification"); Attribute u = ur.getAttribute(); AttributeRole ir = Att.getRole("item identification"); Attribute i = ir.getAttribute(); Attribute ui = Att.getLabel(); for (Example example : exampleSet) { double j = example.getValue(u); int uid = user_mapping.ToInternalID((int) j); j = example.getValue(i); int iid = item_mapping.ToInternalID((int) j); double r = example.getValue(ui); training_data.Add(uid, iid, r); } _slopeOne recommendAlg = new _slopeOne(); recommendAlg.user_mapping = user_mapping; recommendAlg.item_mapping = item_mapping; recommendAlg.SetMinRating(getParameterAsInt("Min Rating")); recommendAlg.SetMaxRating(recommendAlg.GetMinRating() + getParameterAsInt("Range")); recommendAlg.SetRatings(training_data); recommendAlg.Train(); exampleSetOutput.deliver(exampleSet); exampleSetOutput1.deliver(recommendAlg); }
@Override public Model learn(ExampleSet exampleSet) throws OperatorException { DistanceMeasure measure = DistanceMeasures.createMeasure(this); measure.init(exampleSet); GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure); // check if weights should be used boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS); // check if robust estimate should be performed: Then calculate weights and use it anyway if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) { useWeights = true; LocalPolynomialExampleWeightingOperator weightingOperator; try { weightingOperator = OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class); exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this); } catch (OperatorCreationException e) { throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage()); } } Attributes attributes = exampleSet.getAttributes(); Attribute label = attributes.getLabel(); Attribute weightAttribute = attributes.getWeight(); for (Example example : exampleSet) { double[] values = new double[attributes.size()]; double labelValue = example.getValue(label); double weight = 1d; if (weightAttribute != null && useWeights) { weight = example.getValue(weightAttribute); } // filter out examples without influence if (weight > 0d) { // copying example values int i = 0; for (Attribute attribute : attributes) { values[i] = example.getValue(attribute); i++; } // inserting into geometric data collection data.add(values, new RegressionData(values, labelValue, weight)); } } return new LocalPolynomialRegressionModel( exampleSet, data, Neighborhoods.createNeighborhood(this), SmoothingKernels.createKernel(this), getParameterAsInt(PARAMETER_DEGREE), getParameterAsDouble(PARAMETER_RIDGE)); }
// checking for example set and valid attributes @Override public void init(ExampleSet exampleSet) throws OperatorException { super.init(exampleSet); Tools.onlyNominalAttributes(exampleSet, "nominal similarities"); this.useAttribute = new boolean[exampleSet.getAttributes().size()]; int i = 0; for (Attribute attribute : exampleSet.getAttributes()) { if (attribute.isNominal()) { useAttribute[i] = true; } i++; } }
private void restoreOldWeights(ExampleSet exampleSet) { if (this.oldWeights != null) { // need to reset weights Iterator<Example> reader = exampleSet.iterator(); int i = 0; while (reader.hasNext() && i < this.oldWeights.length) { reader.next().setWeight(this.oldWeights[i++]); } } else { // need to delete the weights attribute Attribute weight = exampleSet.getAttributes().getWeight(); exampleSet.getAttributes().remove(weight); exampleSet.getExampleTable().removeAttribute(weight); } }
public boolean hasNext() { if (this.nextInvoked) { this.nextInvoked = false; this.currentIndex++; if (this.currentIndex < parent.size()) { this.currentExample = this.parent.getExample(this.currentIndex); return true; } else { return false; } } return (this.currentIndex < parent.size()); }
private double[] getMeanVector(ExampleSet exampleSet) { exampleSet.recalculateAllAttributeStatistics(); Attributes attributes = exampleSet.getAttributes(); double[] meanVector = new double[attributes.size()]; int i = 0; for (Attribute attribute : attributes) { if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MINIMUM); } else if (attribute.isNominal()) meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MODE); else meanVector[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE); i++; } return meanVector; }
public static SplittedExampleSet splitByAttribute( ExampleSet exampleSet, Attribute attribute, double value) { int[] elements = new int[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); int i = 0; while (reader.hasNext()) { Example example = reader.next(); double currentValue = example.getValue(attribute); if (currentValue <= value) elements[i++] = 0; else elements[i++] = 1; } Partition partition = new Partition(elements, 2); return new SplittedExampleSet(exampleSet, partition); }