Exemplo n.º 1
0
  public SVDModel(ExampleSet exampleSet, double[] singularValues, Matrix vMatrix) {
    super(exampleSet);

    this.vMatrix = vMatrix;
    this.singularValues = singularValues;

    this.keepAttributes = false;
    this.attributeNames = new String[exampleSet.getAttributes().size()];
    int counter = 0;
    for (Attribute attribute : exampleSet.getAttributes()) {
      attributeNames[counter] = attribute.getName();
      counter++;
    }

    // compute cumulative values
    cumulativeSingularValueProportion = new double[singularValues.length];
    // insert cumulative sum of singular values
    singularValuesSum = 0.0d;
    for (int i = 0; i < singularValues.length; i++) {
      singularValuesSum += singularValues[i];
      cumulativeSingularValueProportion[i] = singularValuesSum;
    }

    // now reduce to proportion
    for (int i = 0; i < singularValues.length; i++) {
      cumulativeSingularValueProportion[i] /= singularValuesSum;
    }
  }
Exemplo n.º 2
0
  public PCAModel(ExampleSet eSet, double[] eigenvalues, double[][] eigenvectors) {
    super(eSet);

    this.keepAttributes = false;
    this.attributeNames = new String[eSet.getAttributes().size()];
    this.means = new double[eSet.getAttributes().size()];
    int counter = 0;
    eSet.recalculateAllAttributeStatistics(); // ensures that the statistics were created
    for (Attribute attribute : eSet.getAttributes()) {
      attributeNames[counter] = attribute.getName();
      means[counter] = eSet.getStatistics(attribute, Statistics.AVERAGE);
      counter++;
    }
    this.eigenVectors = new ArrayList<Eigenvector>(eigenvalues.length);
    for (int i = 0; i < eigenvalues.length; i++) {
      double[] currentEigenVector = new double[eSet.getAttributes().size()];
      for (int j = 0; j < currentEigenVector.length; j++) {
        currentEigenVector[j] = eigenvectors[j][i];
      }
      this.eigenVectors.add(new Eigenvector(currentEigenVector, eigenvalues[i]));
    }

    // order the eigenvectors by the eigenvalues
    Collections.sort(this.eigenVectors);

    calculateCumulativeVariance();
  }
  /**
   * Creates attribute meta data that represents the attribute that will be generated for the
   * provided arguments.
   *
   * @return the {@link AttributeMetaData} for the provided arguments
   */
  public static AttributeMetaData generateAttributeMetaData(
      ExampleSet exampleSet, String name, ExpressionType expressionType) {

    AttributeMetaData newAttribute = null;
    Attribute existingAtt = exampleSet.getAttributes().get(name);

    int ontology = expressionType.getAttributeType();

    if (ontology == Ontology.BINOMINAL) {
      newAttribute = new AttributeMetaData(name, Ontology.BINOMINAL);
      HashSet<String> values = new HashSet<>();
      values.add("false");
      values.add("true");
      newAttribute.setValueSet(values, SetRelation.EQUAL);
    } else {
      newAttribute = new AttributeMetaData(name, ontology);
    }

    // restore role if attribute existed already
    if (existingAtt != null) {
      newAttribute.setRole(exampleSet.getAttributes().getRole(existingAtt).getSpecialName());
    }

    return newAttribute;
  }
Exemplo n.º 4
0
  private static Map<Integer, MeanVariance> createMeanVariances(
      com.rapidminer.example.ExampleSet exampleSet) {
    double[] sum = new double[exampleSet.getAttributes().size()];
    double[] squaredSum = new double[sum.length];

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      com.rapidminer.example.Example example = reader.next();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = example.getValue(attribute);
        sum[a] += value;
        squaredSum[a] += value * value;
        a++;
      }
    }

    Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>();
    for (int a = 0; a < sum.length; a++) {
      sum[a] /= exampleSet.size();
      squaredSum[a] /= exampleSet.size();
      meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a])));
    }

    return meanVariances;
  }
  @Override
  public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException {
    boolean round = getParameterAsBoolean(PARAMETER_ROUND);

    List<Attribute> newAttributes = new LinkedList<Attribute>();
    Iterator<Attribute> a = exampleSet.getAttributes().iterator();
    while (a.hasNext()) {
      Attribute attribute = a.next();
      if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL))
          && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) {
        Attribute newAttribute =
            AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER);
        newAttributes.add(newAttribute);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        for (Example example : exampleSet) {
          double originalValue = example.getValue(attribute);
          if (Double.isNaN(originalValue)) {
            example.setValue(newAttribute, Double.NaN);
          } else {
            long newValue = round ? Math.round(originalValue) : (long) originalValue;
            example.setValue(newAttribute, newValue);
          }
        }
        a.remove();
      }
    }

    for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute);

    return exampleSet;
  }
  /** Trains a model using an ExampleSet from the input. Uses the method learn(ExampleSet). */
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    // some checks
    if (exampleSet.getAttributes().getLabel() == null) {
      throw new UserError(this, 105, new Object[0]);
    }
    if (exampleSet.getAttributes().size() == 0) {
      throw new UserError(this, 106, new Object[0]);
    }

    // check capabilities and produce errors if they are not fulfilled
    CapabilityCheck check =
        new CapabilityCheck(
            this,
            Tools.booleanValue(
                ParameterService.getParameterValue(
                    CapabilityProvider.PROPERTY_RAPIDMINER_GENERAL_CAPABILITIES_WARN),
                true));
    check.checkLearnerCapabilities(this, exampleSet);

    Model model = learn(exampleSet);

    modelOutput.deliver(model);
    exampleSetOutput.deliver(exampleSet);
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    String firstName = getParameterAsString(PARAMETER_FIRST_ATTRIBUTE);
    String secondName = getParameterAsString(PARAMETER_SECOND_ATTRIBUTE);

    AttributeRole firstRole = exampleSet.getAttributes().getRole(firstName);
    AttributeRole secondRole = exampleSet.getAttributes().getRole(secondName);

    if (firstRole == null) {
      throw new AttributeNotFoundError(this, PARAMETER_FIRST_ATTRIBUTE, firstName);
    }

    if (secondRole == null) {
      throw new AttributeNotFoundError(this, PARAMETER_SECOND_ATTRIBUTE, secondName);
    }

    String firstRoleName = firstRole.getSpecialName();
    String secondRoleName = secondRole.getSpecialName();

    firstRole.changeToRegular();
    secondRole.changeToRegular();

    firstRole.setSpecial(secondRoleName);
    secondRole.setSpecial(firstRoleName);

    return exampleSet;
  }
Exemplo n.º 8
0
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    Kernel kernel = getKernel();
    kernel.init(exampleSet);

    double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
    NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping();
    String classNeg = labelMapping.getNegativeString();
    String classPos = labelMapping.getPositiveString();
    double classValueNeg = labelMapping.getNegativeIndex();
    int numberOfAttributes = exampleSet.getAttributes().size();
    HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel);
    model.init(new double[numberOfAttributes], 0);
    for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) {
      double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate);
      Attributes attributes = exampleSet.getAttributes();
      for (Example example : exampleSet) {
        double prediction = model.predict(example);
        if (prediction != example.getLabel()) {
          double direction = (example.getLabel() == classValueNeg) ? -1 : 1;
          // adapting intercept
          model.setIntercept(model.getIntercept() + learnRate * direction);
          // adapting coefficients
          double coefficients[] = model.getCoefficients();
          int i = 0;
          for (Attribute attribute : attributes) {
            coefficients[i] += learnRate * direction * example.getValue(attribute);
            i++;
          }
        }
      }
    }
    return model;
  }
  private BasicNetwork getNetwork(ExampleSet exampleSet) throws OperatorException {
    BasicNetwork network = new BasicNetwork();

    // input layer
    network.addLayer(new FeedforwardLayer(exampleSet.getAttributes().size()));

    // hidden layers
    log("No hidden layers defined. Using default hidden layers.");
    int layerSize = getParameterAsInt(PARAMETER_DEFAULT_HIDDEN_LAYER_SIZE);
    if (layerSize <= 0) layerSize = getDefaultLayerSize(exampleSet);
    for (int p = 0; p < getParameterAsInt(PARAMETER_DEFAULT_NUMBER_OF_HIDDEN_LAYERS); p++) {
      network.addLayer(new FeedforwardLayer(layerSize));
    }

    // output layer
    if (exampleSet.getAttributes().getLabel().isNominal()) {
      network.addLayer(new FeedforwardLayer(new ActivationSigmoid(), 1));
    } else {
      network.addLayer(new FeedforwardLayer(new ActivationLinear(), 1));
    }

    network.reset(
        RandomGenerator.getRandomGenerator(
            getParameterAsBoolean(RandomGenerator.PARAMETER_USE_LOCAL_RANDOM_SEED),
            getParameterAsInt(RandomGenerator.PARAMETER_LOCAL_RANDOM_SEED)));

    return network;
  }
Exemplo n.º 10
0
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME);
    Long offset = getParameterAsLong(PARMETER_TIME_OFFSET);

    Attribute numericalAttribute = exampleSet.getAttributes().get(attributeName);
    if (numericalAttribute == null) {
      throw new UserError(this, 111, attributeName);
    }

    Attribute newAttribute = AttributeFactory.createAttribute(Ontology.DATE_TIME);
    exampleSet.getExampleTable().addAttribute(newAttribute);
    exampleSet.getAttributes().addRegular(newAttribute);

    for (Example example : exampleSet) {
      double value = example.getValue(numericalAttribute);
      if (Double.isNaN(value)) {
        example.setValue(newAttribute, value);
      } else {
        value += offset;
        example.setValue(newAttribute, value);
      }
    }

    if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) {
      AttributeRole oldRole = exampleSet.getAttributes().getRole(numericalAttribute);
      exampleSet.getAttributes().remove(numericalAttribute);
      newAttribute.setName(attributeName);
      exampleSet.getAttributes().setSpecialAttribute(newAttribute, oldRole.getSpecialName());
    } else {
      newAttribute.setName(attributeName + "_AS_DATE");
    }
    return exampleSet;
  }
Exemplo n.º 11
0
  /**
   * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha
   * values and b are zero, the label will be set if it is known.
   */
  public SVMExamples(
      com.rapidminer.example.ExampleSet exampleSet,
      Attribute labelAttribute,
      Map<Integer, MeanVariance> meanVariances) {
    this(exampleSet.size(), 0.0d);
    this.meanVarianceMap = meanVariances;

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    Attribute idAttribute = exampleSet.getAttributes().getId();
    int exampleCounter = 0;
    while (reader.hasNext()) {
      com.rapidminer.example.Example current = reader.next();
      Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = current.getValue(attribute);
        if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) {
          attributeMap.put(a, value);
        }
        if ((a + 1) > dim) {
          dim = (a + 1);
        }
        a++;
      }
      atts[exampleCounter] = new double[attributeMap.size()];
      index[exampleCounter] = new int[attributeMap.size()];
      Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator();
      int attributeCounter = 0;
      while (i.hasNext()) {
        Map.Entry<Integer, Double> e = i.next();
        Integer indexValue = e.getKey();
        Double attributeValue = e.getValue();
        index[exampleCounter][attributeCounter] = indexValue.intValue();
        double value = attributeValue.doubleValue();
        MeanVariance meanVariance = meanVarianceMap.get(indexValue);
        if (meanVariance != null) {
          if (meanVariance.getVariance() == 0.0d) {
            value = 0.0d;
          } else {
            value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance());
          }
        }
        atts[exampleCounter][attributeCounter] = value;
        attributeCounter++;
      }
      if (labelAttribute != null) {
        double label = current.getValue(labelAttribute);
        if (labelAttribute.isNominal()) {
          ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1);
        } else {
          ys[exampleCounter] = label;
        }
      }
      if (idAttribute != null) {
        ids[exampleCounter] = current.getValueAsString(idAttribute);
      }
      exampleCounter++;
    }
  }
  private List<AggregationFunction> createAggreationFunctions(ExampleSet exampleSet)
      throws OperatorException {
    // load global switches
    boolean ignoreMissings = getParameterAsBoolean(PARAMETER_IGNORE_MISSINGS);
    boolean countOnlyDistinct = getParameterAsBoolean(PARAMETER_ONLY_DISTINCT);

    // creating data structures for building aggregates
    List<AggregationFunction> aggregationFunctions = new LinkedList<AggregationFunction>();

    // building functions for all explicitly defined aggregation attributes
    Set<Attribute> explicitlyAggregatedAttributes = new HashSet<Attribute>();
    List<String[]> aggregationFunctionPairs = getParameterList(PARAMETER_AGGREGATION_ATTRIBUTES);
    for (String[] aggregationFunctionPair : aggregationFunctionPairs) {
      Attribute attribute = exampleSet.getAttributes().get(aggregationFunctionPair[0]);
      if (attribute == null) {
        throw new UserError(
            this, "aggregation.aggregation_attribute_not_present", aggregationFunctionPair[0]);
      }
      AggregationFunction function =
          AggregationFunction.createAggregationFunction(
              aggregationFunctionPair[1], attribute, ignoreMissings, countOnlyDistinct);
      if (!function.isCompatible()) {
        throw new UserError(
            this,
            "aggregation.incompatible_attribute_type",
            attribute.getName(),
            aggregationFunctionPair[1]);
      }
      // adding objects for this attribute to structure
      explicitlyAggregatedAttributes.add(attribute);
      aggregationFunctions.add(function);
    }

    // building the default aggregations
    if (getParameterAsBoolean(PARAMETER_USE_DEFAULT_AGGREGATION)) {
      String defaultAggregationFunctionName =
          getParameterAsString(PARAMETER_DEFAULT_AGGREGATION_FUNCTION);

      Iterator<Attribute> iterator =
          attributeSelector.getAttributeSubset(exampleSet, false).iterator();
      if (getCompatibilityLevel().isAtMost(VERSION_5_2_8)) {
        iterator = exampleSet.getAttributes().iterator();
      }

      while (iterator.hasNext()) {
        Attribute attribute = iterator.next();
        if (!explicitlyAggregatedAttributes.contains(attribute)) {
          AggregationFunction function =
              AggregationFunction.createAggregationFunction(
                  defaultAggregationFunctionName, attribute, ignoreMissings, countOnlyDistinct);
          if (function.isCompatible()) {
            aggregationFunctions.add(function);
          }
        }
      }
    }

    return aggregationFunctions;
  }
Exemplo n.º 13
0
 /*
  * Extracts an example set containing just the two specified
  * attributes and no missing values.
  *
  * @param eSet the source example set
  * @param a the first attribute to extract
  * @param b the second attribute to extract
  * @return the reduced example set
  */
 private static ExampleSet extract(ExampleSet eSet, Attribute a, Attribute b) {
   // create a new example set containing just attributes a and b
   ExampleSet e = (ExampleSet) eSet.clone();
   e.getAttributes().clearRegular();
   e.getAttributes().clearSpecial();
   e.getAttributes().addRegular(a);
   e.getAttributes().addRegular(b);
   return new ConditionedExampleSet(e, new NoMissingAttributesCondition(e, null));
 }
Exemplo n.º 14
0
 /**
  * Helper method replacing <code>Model.createPredictedLabel(ExampleSet)</code> in order to lower
  * memory consumption.
  */
 private static void createOrReplacePredictedLabelFor(ExampleSet exampleSet, Model model) {
   Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel();
   if (predictedLabel != null) { // remove old predicted label
     exampleSet.getAttributes().remove(predictedLabel);
     exampleSet.getExampleTable().removeAttribute(predictedLabel);
   }
   // model.createPredictedLabel(exampleSet); // not longer necessary since
   // label creation is done by model.apply(...).
 }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // recall: difference = minuend - subtrahend
    // but the subtrahend is last on the ioobjects stack, so pop first
    ExampleSet subtrahendSet = subtrahendInput.getData(ExampleSet.class);
    ExampleSet minuendSet = exampleSet;

    subtrahendSet.remapIds();
    minuendSet.remapIds();

    Attribute minuendId = minuendSet.getAttributes().getId();
    Attribute subtrahendId = subtrahendSet.getAttributes().getId();

    // sanity checks
    if ((minuendId == null) || (subtrahendId == null)) {
      throw new UserError(this, 129);
    }
    if (minuendId.getValueType() != subtrahendId.getValueType()) {
      throw new UserError(
          this,
          120,
          new Object[] {
            subtrahendId.getName(),
            Ontology.VALUE_TYPE_NAMES[subtrahendId.getValueType()],
            Ontology.VALUE_TYPE_NAMES[minuendId.getValueType()]
          });
    }

    List<Integer> indices = new LinkedList<>();
    {
      int i = 0;
      for (Example example : minuendSet) {
        double id = example.getValue(minuendId);
        Example subtrahendExample = null;
        if (minuendId.isNominal()) {
          subtrahendExample =
              subtrahendSet.getExampleFromId(
                  subtrahendId.getMapping().getIndex(minuendId.getMapping().mapIndex((int) id)));
        } else {
          subtrahendExample = subtrahendSet.getExampleFromId(id);
        }
        if (subtrahendExample == null) {
          indices.add(i);
        }
        i++;
      }
    }

    int[] indexArray = new int[indices.size()];
    for (int i = 0; i < indices.size(); i++) {
      indexArray[i] = indices.get(i);
    }

    ExampleSet minusSet = new MappedExampleSet(minuendSet, indexArray);
    return minusSet;
  }
Exemplo n.º 16
0
  @Override
  public void doWork() throws OperatorException {

    ExampleSet exampleSet = exampleSetInput.getData();

    IEntityMapping user_mapping = new EntityMapping();
    IEntityMapping item_mapping = new EntityMapping();
    IRatings training_data = new Ratings();

    if (exampleSet.getAttributes().getSpecial("user identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getSpecial("item identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getLabel() == null) {
      throw new UserError(this, 105);
    }

    Attributes Att = exampleSet.getAttributes();
    AttributeRole ur = Att.getRole("user identification");
    Attribute u = ur.getAttribute();
    AttributeRole ir = Att.getRole("item identification");
    Attribute i = ir.getAttribute();
    Attribute ui = Att.getLabel();

    for (Example example : exampleSet) {

      double j = example.getValue(u);
      int uid = user_mapping.ToInternalID((int) j);

      j = example.getValue(i);
      int iid = item_mapping.ToInternalID((int) j);

      double r = example.getValue(ui);
      training_data.Add(uid, iid, r);
    }

    _slopeOne recommendAlg = new _slopeOne();

    recommendAlg.user_mapping = user_mapping;
    recommendAlg.item_mapping = item_mapping;
    recommendAlg.SetMinRating(getParameterAsInt("Min Rating"));
    recommendAlg.SetMaxRating(recommendAlg.GetMinRating() + getParameterAsInt("Range"));

    recommendAlg.SetRatings(training_data);

    recommendAlg.Train();

    exampleSetOutput.deliver(exampleSet);

    exampleSetOutput1.deliver(recommendAlg);
  }
 // checking for example set and valid attributes
 @Override
 public void init(ExampleSet exampleSet) throws OperatorException {
   super.init(exampleSet);
   Tools.onlyNominalAttributes(exampleSet, "nominal similarities");
   this.useAttribute = new boolean[exampleSet.getAttributes().size()];
   int i = 0;
   for (Attribute attribute : exampleSet.getAttributes()) {
     if (attribute.isNominal()) {
       useAttribute[i] = true;
     }
     i++;
   }
 }
Exemplo n.º 18
0
 private void restoreOldWeights(ExampleSet exampleSet) {
   if (this.oldWeights != null) { // need to reset weights
     Iterator<Example> reader = exampleSet.iterator();
     int i = 0;
     while (reader.hasNext() && i < this.oldWeights.length) {
       reader.next().setWeight(this.oldWeights[i++]);
     }
   } else { // need to delete the weights attribute
     Attribute weight = exampleSet.getAttributes().getWeight();
     exampleSet.getAttributes().remove(weight);
     exampleSet.getExampleTable().removeAttribute(weight);
   }
 }
Exemplo n.º 19
0
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    // only use numeric attributes
    Tools.onlyNumericalAttributes(exampleSet, "KernelPCA");
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this);

    Attributes attributes = exampleSet.getAttributes();
    int numberOfExamples = exampleSet.size();

    // calculating means for later zero centering
    exampleSet.recalculateAllAttributeStatistics();
    double[] means = new double[exampleSet.getAttributes().size()];
    int i = 0;
    for (Attribute attribute : exampleSet.getAttributes()) {
      means[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE);
      i++;
    }

    // kernel
    Kernel kernel = Kernel.createKernel(this);

    // copying zero centered exampleValues
    ArrayList<double[]> exampleValues = new ArrayList<double[]>(numberOfExamples);
    i = 0;
    for (Example columnExample : exampleSet) {
      double[] columnValues = getAttributeValues(columnExample, attributes, means);
      exampleValues.add(columnValues);
      i++;
    }

    // filling kernel matrix
    Matrix kernelMatrix = new Matrix(numberOfExamples, numberOfExamples);
    for (i = 0; i < numberOfExamples; i++) {
      for (int j = 0; j < numberOfExamples; j++) {
        kernelMatrix.set(
            i, j, kernel.calculateDistance(exampleValues.get(i), exampleValues.get(j)));
      }
    }

    // calculating eigenVectors
    EigenvalueDecomposition eig = kernelMatrix.eig();
    Model model = new KernelPCAModel(exampleSet, means, eig.getV(), exampleValues, kernel);

    if (exampleSetOutput.isConnected()) {
      exampleSetOutput.deliver(model.apply(exampleSet));
    }
    originalOutput.deliver(exampleSet);
    modelOutput.deliver(model);
  }
  private NeuralDataSet getTraining(ExampleSet exampleSet) {
    double[][] data = new double[exampleSet.size()][exampleSet.getAttributes().size()];
    double[][] labels = new double[exampleSet.size()][1];
    int index = 0;
    Attribute label = exampleSet.getAttributes().getLabel();

    this.attributeMin = new double[exampleSet.getAttributes().size()];
    this.attributeMax = new double[attributeMin.length];
    exampleSet.recalculateAllAttributeStatistics();
    int a = 0;
    for (Attribute attribute : exampleSet.getAttributes()) {
      this.attributeMin[a] = exampleSet.getStatistics(attribute, Statistics.MINIMUM);
      this.attributeMax[a] = exampleSet.getStatistics(attribute, Statistics.MAXIMUM);
      a++;
    }

    this.labelMin = exampleSet.getStatistics(label, Statistics.MINIMUM);
    this.labelMax = exampleSet.getStatistics(label, Statistics.MAXIMUM);

    for (Example example : exampleSet) {
      // attributes
      a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        if (attributeMin[a] != attributeMax[a]) {
          data[index][a] =
              (example.getValue(attribute) - attributeMin[a]) / (attributeMax[a] - attributeMin[a]);
        } else {
          data[index][a] = example.getValue(attribute) - attributeMin[a];
        }
        a++;
      }

      // label
      if (label.isNominal()) {
        labels[index][0] = example.getValue(label);
      } else {
        if (labelMax != labelMin) {
          labels[index][0] = (example.getValue(label) - labelMin) / (labelMax - labelMin);
        } else {
          labels[index][0] = example.getValue(label) - labelMin;
        }
      }

      index++;
    }

    return new BasicNeuralDataSet(data, labels);
  }
Exemplo n.º 21
0
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // generating assignment
    RandomGenerator random = RandomGenerator.getRandomGenerator(this);
    int clusterAssignments[] = new int[exampleSet.size()];
    int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS);
    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = random.nextInt(k);
    }

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    model.setClusterAssignments(clusterAssignments, exampleSet);

    // generating cluster attribute
    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
 /** Creates the partition builder for the given sampling type. */
 private static PartitionBuilder createPartitionBuilder(
     ExampleSet exampleSet, int samplingType, int seed) {
   PartitionBuilder builder = null;
   switch (samplingType) {
     case LINEAR_SAMPLING:
       builder = new SimplePartitionBuilder();
       break;
     case SHUFFLED_SAMPLING:
       builder = new ShuffledPartitionBuilder(true, seed);
       break;
     case STRATIFIED_SAMPLING:
     default:
       Attribute label = exampleSet.getAttributes().getLabel();
       if ((label != null) && (label.isNominal()))
         builder = new StratifiedPartitionBuilder(exampleSet, true, seed);
       else {
         exampleSet
             .getLog()
             .logNote(
                 "Example set has no nominal label: using shuffled partition instead of stratified partition!");
         builder = new ShuffledPartitionBuilder(true, seed);
       }
       break;
   }
   return builder;
 }
  @Override
  public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet)
      throws OperatorException {
    boolean sortMappings = getParameterAsBoolean(PARAMETER_SORT_MAPPING_ALPHABETICALLY);

    Map<String, MappingTranslation> translations = new HashMap<String, MappingTranslation>();

    exampleSet.recalculateAllAttributeStatistics();
    for (Attribute attribute : exampleSet.getAttributes()) {
      MappingTranslation translation =
          new MappingTranslation((NominalMapping) attribute.getMapping().clone());
      if (attribute.isNominal()) {
        for (String value : attribute.getMapping().getValues()) {
          double count = exampleSet.getStatistics(attribute, Statistics.COUNT, value);
          if (count > 0) {
            translation.newMapping.mapString(value);
          }
        }
        if (translation.newMapping.size() < attribute.getMapping().size()) {
          if (sortMappings) {
            translation.newMapping.sortMappings();
          }
          translations.put(attribute.getName(), translation);
        }
      }
    }
    return new RemoveUnusedNominalValuesModel(exampleSet, translations);
  }
  @Override
  public AttributeWeights calculateWeights(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();
    Attribute labelAttribute = attributes.getLabel();
    boolean useSquaredCorrelation = getParameterAsBoolean(PARAMETER_SQUARED_CORRELATION);

    AttributeWeights weights = new AttributeWeights(exampleSet);
    getProgress().setTotal(attributes.size());
    int progressCounter = 0;
    int exampleSetSize = exampleSet.size();
    int exampleCounter = 0;
    for (Attribute attribute : attributes) {
      double correlation =
          MathFunctions.correlation(exampleSet, labelAttribute, attribute, useSquaredCorrelation);
      weights.setWeight(attribute.getName(), Math.abs(correlation));
      progressCounter++;
      exampleCounter += exampleSetSize;
      if (exampleCounter > PROGRESS_UPDATE_STEPS) {
        exampleCounter = 0;
        getProgress().setCompleted(progressCounter);
      }
    }

    return weights;
  }
  @Override
  public void doWork() throws OperatorException {
    CentroidClusterModel model = modelInput.getData(CentroidClusterModel.class);

    Attributes trainAttributes = model.getTrainingHeader().getAttributes();
    String[] attributeNames = model.getAttributeNames();
    Attribute[] attributes = new Attribute[attributeNames.length + 1];
    for (int i = 0; i < attributeNames.length; i++) {
      Attribute originalAttribute = trainAttributes.get(attributeNames[i]);
      attributes[i] =
          AttributeFactory.createAttribute(attributeNames[i], originalAttribute.getValueType());
      if (originalAttribute.isNominal()) {
        attributes[i].setMapping((NominalMapping) originalAttribute.getMapping().clone());
      }
    }
    Attribute clusterAttribute = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
    attributes[attributes.length - 1] = clusterAttribute;

    MemoryExampleTable table = new MemoryExampleTable(attributes);
    for (int i = 0; i < model.getNumberOfClusters(); i++) {
      double[] data = new double[attributeNames.length + 1];
      System.arraycopy(model.getCentroidCoordinates(i), 0, data, 0, attributeNames.length);
      data[attributeNames.length] = clusterAttribute.getMapping().mapString("cluster_" + i);
      table.addDataRow(new DoubleArrayDataRow(data));
    }

    ExampleSet resultSet = table.createExampleSet();
    resultSet.getAttributes().setSpecialAttribute(clusterAttribute, Attributes.CLUSTER_NAME);

    modelOutput.deliver(model);
    exampleSetOutput.deliver(resultSet);
  }
 public SOMModelPlotter(PlotterConfigurationModel settings, ExampleSet exampleSet, Model model) {
   super(settings);
   this.model = model;
   this.exampleSet = exampleSet;
   this.colorizer =
       new SOMClassColorizer(exampleSet.getAttributes().getLabel().getMapping().size());
 }
  @Override
  public ExampleSet read() throws OperatorException {
    FileInputStream inStream = null;
    try {
      inStream = new FileInputStream(getParameterAsFile(PARAMETER_FASTA_FILE_NAME));
    } catch (FileNotFoundException e) {
      // TODO: "Fill"
    }
    FastaReader<DNASequence, NucleotideCompound> fastaReader =
        new FastaReader<DNASequence, NucleotideCompound>(
            inStream,
            new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(),
            new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
    LinkedHashMap<String, DNASequence> b = null;
    try {
      b = fastaReader.process();
    } catch (Exception e) {
      // TODO: "Fill"
    }
    String data[][] = new String[0][];
    if (b != null) {
      data = new String[b.size()][2];
      int i = 0;
      for (Map.Entry<String, DNASequence> entry : b.entrySet()) {
        data[i][0] = entry.getValue().getOriginalHeader();
        data[i][1] = entry.getValue().getSequenceAsString();
        i++;
      }
    }

    ExampleSet outSet = ExampleSetFactory.createExampleSet(data);
    outSet.getAttributes().get("att1").setName("DNA name");
    outSet.getAttributes().get("att2").setName("Chain");
    return outSet;
  }
  /** Creates a new evolutionary SVM optimization. */
  public ClassificationEvoOptimization(
      ExampleSet exampleSet, // training data
      Kernel kernel,
      double c, // SVM paras
      int initType, // start population creation type para
      int maxIterations,
      int generationsWithoutImprovement,
      int popSize, // GA paras
      int selectionType,
      double tournamentFraction,
      boolean keepBest, // selection paras
      int mutationType, // type of mutation
      double crossoverProb,
      boolean showConvergencePlot,
      boolean showPopulationPlot,
      ExampleSet holdOutSet,
      RandomGenerator random,
      LoggingHandler logging,
      Operator executingOperator) {
    super(
        EvoSVM.createBoundArray(0.0d, exampleSet.size()),
        EvoSVM.determineMax(c, kernel, exampleSet, selectionType, exampleSet.size()),
        popSize,
        exampleSet.size(),
        initType,
        maxIterations,
        generationsWithoutImprovement,
        selectionType,
        tournamentFraction,
        keepBest,
        mutationType,
        Double.NaN,
        crossoverProb,
        showConvergencePlot,
        showPopulationPlot,
        random,
        logging,
        executingOperator);
    this.exampleSet = exampleSet;
    this.holdOutSet = holdOutSet;
    this.populationSize = popSize;

    this.kernel = kernel;
    this.c = getMax(0);

    // label values
    this.ys = new double[exampleSet.size()];
    Iterator<Example> reader = exampleSet.iterator();
    int index = 0;
    Attribute label = exampleSet.getAttributes().getLabel();
    while (reader.hasNext()) {
      Example example = reader.next();
      ys[index++] = example.getLabel() == label.getMapping().getPositiveIndex() ? 1.0d : -1.0d;
    }

    // optimization function
    this.optimizationFunction =
        new ClassificationOptimizationFunction(selectionType == NON_DOMINATED_SORTING_SELECTION);
  }
Exemplo n.º 29
0
  @Override
  public void doWork() throws OperatorException {

    ExampleSet exampleSet = exampleSetInput.getData();

    IPosOnlyFeedback training_data = new PosOnlyFeedback();
    IEntityMapping user_mapping = new EntityMapping();
    IEntityMapping item_mapping = new EntityMapping();

    if (exampleSet.getAttributes().getSpecial("user identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getSpecial("item identification") == null) {
      throw new UserError(this, 105);
    }

    Attributes Att = exampleSet.getAttributes();
    AttributeRole ur = Att.getRole("user identification");
    Attribute u = ur.getAttribute();
    AttributeRole ir = Att.getRole("item identification");
    Attribute i = ir.getAttribute();

    for (Example example : exampleSet) {

      double j = example.getValue(u);
      int uid = (int) j;

      j = example.getValue(i);
      int iid = (int) j;

      training_data.Add(user_mapping.ToInternalID(uid), item_mapping.ToInternalID(iid));
      checkForStop();
    }

    System.out.println(training_data.GetMaxItemID() + " " + training_data.GetMaxUserID());

    Random recommendAlg = new Random();
    recommendAlg.SetFeedback(training_data);
    recommendAlg.user_mapping = user_mapping;
    recommendAlg.item_mapping = item_mapping;
    recommendAlg.Train();

    exampleSetOutput.deliver(exampleSet);
    exampleSetOutput1.deliver(recommendAlg);
  }
Exemplo n.º 30
0
  /**
   * Iterates over all models and returns the class with maximum likelihood.
   *
   * @param origExampleSet the set of examples to be classified
   */
  @Override
  public ExampleSet performPrediction(ExampleSet origExampleSet, Attribute predictedLabel)
      throws OperatorException {
    final String attributePrefix = "AdaBoostModelPrediction";
    final int numLabels = predictedLabel.getMapping().size();
    final Attribute[] specialAttributes = new Attribute[numLabels];
    OperatorProgress progress = null;
    if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) {
      progress = getOperator().getProgress();
      progress.setTotal(100);
    }
    for (int i = 0; i < numLabels; i++) {
      specialAttributes[i] =
          com.rapidminer.example.Tools.createSpecialAttribute(
              origExampleSet, attributePrefix + i, Ontology.NUMERICAL);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels));
      }
    }

    Iterator<Example> reader = origExampleSet.iterator();
    int progressCounter = 0;
    while (reader.hasNext()) {
      Example example = reader.next();
      for (int i = 0; i < specialAttributes.length; i++) {
        example.setValue(specialAttributes[i], 0);
      }
      if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) {
        progress.setCompleted((int) (25.0 * progressCounter / origExampleSet.size()) + 25);
      }
    }

    reader = origExampleSet.iterator();
    for (int modelNr = 0; modelNr < this.getNumberOfModels(); modelNr++) {
      Model model = this.getModel(modelNr);
      ExampleSet exampleSet = (ExampleSet) origExampleSet.clone();
      exampleSet = model.apply(exampleSet);
      this.updateEstimates(exampleSet, modelNr, specialAttributes);
      PredictionModel.removePredictedLabel(exampleSet);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (modelNr + 1) / this.getNumberOfModels()) + 50);
      }
    }

    // Turn prediction weights into confidences and a crisp predcition:
    this.evaluateSpecialAttributes(origExampleSet, specialAttributes);

    // Clean up attributes:
    for (int i = 0; i < numLabels; i++) {
      origExampleSet.getAttributes().remove(specialAttributes[i]);
      origExampleSet.getExampleTable().removeAttribute(specialAttributes[i]);
      if (progress != null) {
        progress.setCompleted((int) (25.0 * (i + 1) / numLabels) + 75);
      }
    }

    return origExampleSet;
  }