Пример #1
0
  public SVDModel(ExampleSet exampleSet, double[] singularValues, Matrix vMatrix) {
    super(exampleSet);

    this.vMatrix = vMatrix;
    this.singularValues = singularValues;

    this.keepAttributes = false;
    this.attributeNames = new String[exampleSet.getAttributes().size()];
    int counter = 0;
    for (Attribute attribute : exampleSet.getAttributes()) {
      attributeNames[counter] = attribute.getName();
      counter++;
    }

    // compute cumulative values
    cumulativeSingularValueProportion = new double[singularValues.length];
    // insert cumulative sum of singular values
    singularValuesSum = 0.0d;
    for (int i = 0; i < singularValues.length; i++) {
      singularValuesSum += singularValues[i];
      cumulativeSingularValueProportion[i] = singularValuesSum;
    }

    // now reduce to proportion
    for (int i = 0; i < singularValues.length; i++) {
      cumulativeSingularValueProportion[i] /= singularValuesSum;
    }
  }
  @Override
  public AttributeWeights calculateWeights(ExampleSet exampleSet) throws OperatorException {
    Attributes attributes = exampleSet.getAttributes();
    Attribute labelAttribute = attributes.getLabel();
    boolean useSquaredCorrelation = getParameterAsBoolean(PARAMETER_SQUARED_CORRELATION);

    AttributeWeights weights = new AttributeWeights(exampleSet);
    getProgress().setTotal(attributes.size());
    int progressCounter = 0;
    int exampleSetSize = exampleSet.size();
    int exampleCounter = 0;
    for (Attribute attribute : attributes) {
      double correlation =
          MathFunctions.correlation(exampleSet, labelAttribute, attribute, useSquaredCorrelation);
      weights.setWeight(attribute.getName(), Math.abs(correlation));
      progressCounter++;
      exampleCounter += exampleSetSize;
      if (exampleCounter > PROGRESS_UPDATE_STEPS) {
        exampleCounter = 0;
        getProgress().setCompleted(progressCounter);
      }
    }

    return weights;
  }
  /**
   * Creates attribute meta data that represents the attribute that will be generated for the
   * provided arguments.
   *
   * @return the {@link AttributeMetaData} for the provided arguments
   */
  public static AttributeMetaData generateAttributeMetaData(
      ExampleSet exampleSet, String name, ExpressionType expressionType) {

    AttributeMetaData newAttribute = null;
    Attribute existingAtt = exampleSet.getAttributes().get(name);

    int ontology = expressionType.getAttributeType();

    if (ontology == Ontology.BINOMINAL) {
      newAttribute = new AttributeMetaData(name, Ontology.BINOMINAL);
      HashSet<String> values = new HashSet<>();
      values.add("false");
      values.add("true");
      newAttribute.setValueSet(values, SetRelation.EQUAL);
    } else {
      newAttribute = new AttributeMetaData(name, ontology);
    }

    // restore role if attribute existed already
    if (existingAtt != null) {
      newAttribute.setRole(exampleSet.getAttributes().getRole(existingAtt).getSpecialName());
    }

    return newAttribute;
  }
  @Override
  public ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException {
    boolean round = getParameterAsBoolean(PARAMETER_ROUND);

    List<Attribute> newAttributes = new LinkedList<Attribute>();
    Iterator<Attribute> a = exampleSet.getAttributes().iterator();
    while (a.hasNext()) {
      Attribute attribute = a.next();
      if ((Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL))
          && (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER))) {
        Attribute newAttribute =
            AttributeFactory.createAttribute(attribute.getName(), Ontology.INTEGER);
        newAttributes.add(newAttribute);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        for (Example example : exampleSet) {
          double originalValue = example.getValue(attribute);
          if (Double.isNaN(originalValue)) {
            example.setValue(newAttribute, Double.NaN);
          } else {
            long newValue = round ? Math.round(originalValue) : (long) originalValue;
            example.setValue(newAttribute, newValue);
          }
        }
        a.remove();
      }
    }

    for (Attribute attribute : newAttributes) exampleSet.getAttributes().addRegular(attribute);

    return exampleSet;
  }
Пример #5
0
  private static Map<Integer, MeanVariance> createMeanVariances(
      com.rapidminer.example.ExampleSet exampleSet) {
    double[] sum = new double[exampleSet.getAttributes().size()];
    double[] squaredSum = new double[sum.length];

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    while (reader.hasNext()) {
      com.rapidminer.example.Example example = reader.next();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = example.getValue(attribute);
        sum[a] += value;
        squaredSum[a] += value * value;
        a++;
      }
    }

    Map<Integer, MeanVariance> meanVariances = new HashMap<Integer, MeanVariance>();
    for (int a = 0; a < sum.length; a++) {
      sum[a] /= exampleSet.size();
      squaredSum[a] /= exampleSet.size();
      meanVariances.put(a, new MeanVariance(sum[a], squaredSum[a] - (sum[a] * sum[a])));
    }

    return meanVariances;
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    String firstName = getParameterAsString(PARAMETER_FIRST_ATTRIBUTE);
    String secondName = getParameterAsString(PARAMETER_SECOND_ATTRIBUTE);

    AttributeRole firstRole = exampleSet.getAttributes().getRole(firstName);
    AttributeRole secondRole = exampleSet.getAttributes().getRole(secondName);

    if (firstRole == null) {
      throw new AttributeNotFoundError(this, PARAMETER_FIRST_ATTRIBUTE, firstName);
    }

    if (secondRole == null) {
      throw new AttributeNotFoundError(this, PARAMETER_SECOND_ATTRIBUTE, secondName);
    }

    String firstRoleName = firstRole.getSpecialName();
    String secondRoleName = secondRole.getSpecialName();

    firstRole.changeToRegular();
    secondRole.changeToRegular();

    firstRole.setSpecial(secondRoleName);
    secondRole.setSpecial(firstRoleName);

    return exampleSet;
  }
  /** Trains a model using an ExampleSet from the input. Uses the method learn(ExampleSet). */
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    // some checks
    if (exampleSet.getAttributes().getLabel() == null) {
      throw new UserError(this, 105, new Object[0]);
    }
    if (exampleSet.getAttributes().size() == 0) {
      throw new UserError(this, 106, new Object[0]);
    }

    // check capabilities and produce errors if they are not fulfilled
    CapabilityCheck check =
        new CapabilityCheck(
            this,
            Tools.booleanValue(
                ParameterService.getParameterValue(
                    CapabilityProvider.PROPERTY_RAPIDMINER_GENERAL_CAPABILITIES_WARN),
                true));
    check.checkLearnerCapabilities(this, exampleSet);

    Model model = learn(exampleSet);

    modelOutput.deliver(model);
    exampleSetOutput.deliver(exampleSet);
  }
  /**
   * Gets the input data and macro name and iterates over the example set while updating the current
   * iteration in the given macro.
   */
  @Override
  public void doWork() throws OperatorException {
    outExtender.reset();
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);
    String iterationMacroName = getParameterAsString(PARAMETER_ITERATION_MACRO);
    boolean innerSinkIsConnected = exampleSetInnerSink.isConnected();

    for (iteration = 1; iteration <= exampleSet.size(); iteration++) {

      getProcess().getMacroHandler().addMacro(iterationMacroName, String.valueOf(iteration));

      // passing in clone or if connected the result from last iteration
      exampleSetInnerSource.deliver(
          innerSinkIsConnected ? exampleSet : (ExampleSet) exampleSet.clone());
      getSubprocess(0).execute();
      inApplyLoop();

      if (innerSinkIsConnected) {
        exampleSet = exampleSetInnerSink.getData(ExampleSet.class);
      }

      outExtender.collect();
    }

    getProcess().getMacroHandler().removeMacro(iterationMacroName);
    exampleSetOutput.deliver(exampleSet);
  }
  @Override
  public final void doWork() throws OperatorException {
    ExampleSet inputExampleSet = exampleSetInput.getData(ExampleSet.class);
    ExampleSet applySet = null;
    // check for needed copy of original exampleset
    if (originalOutput.isConnected() && writesIntoExistingData()) {
      int type = DataRowFactory.TYPE_DOUBLE_ARRAY;
      if (inputExampleSet.getExampleTable() instanceof MemoryExampleTable) {
        DataRowReader dataRowReader = inputExampleSet.getExampleTable().getDataRowReader();
        if (dataRowReader.hasNext()) {
          type = dataRowReader.next().getType();
        }
      }
      // check if type is supported to be copied
      if (type >= 0) {
        applySet = MaterializeDataInMemory.materializeExampleSet(inputExampleSet, type);
      }
    }

    if (applySet == null) applySet = (ExampleSet) inputExampleSet.clone();

    // we apply on the materialized data, because writing can't take place in views anyway.
    ExampleSet result = apply(applySet);
    originalOutput.deliver(inputExampleSet);
    exampleSetOutput.deliver(result);
  }
Пример #10
0
  @Override
  public ClusterModel generateClusterModel(ExampleSet exampleSet) throws OperatorException {
    // checking and creating ids if necessary
    Tools.checkAndCreateIds(exampleSet);

    // generating assignment
    RandomGenerator random = RandomGenerator.getRandomGenerator(this);
    int clusterAssignments[] = new int[exampleSet.size()];
    int k = getParameterAsInt(PARAMETER_NUMBER_OF_CLUSTERS);
    for (int i = 0; i < exampleSet.size(); i++) {
      clusterAssignments[i] = random.nextInt(k);
    }

    ClusterModel model =
        new ClusterModel(
            exampleSet,
            k,
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_ADD_AS_LABEL),
            getParameterAsBoolean(RMAbstractClusterer.PARAMETER_REMOVE_UNLABELED));
    model.setClusterAssignments(clusterAssignments, exampleSet);

    // generating cluster attribute
    if (addsClusterAttribute()) {
      Attribute cluster = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
      exampleSet.getExampleTable().addAttribute(cluster);
      exampleSet.getAttributes().setCluster(cluster);
      int i = 0;
      for (Example example : exampleSet) {
        example.setValue(cluster, "cluster_" + clusterAssignments[i]);
        i++;
      }
    }
    return model;
  }
Пример #11
0
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    Kernel kernel = getKernel();
    kernel.init(exampleSet);

    double initLearnRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
    NominalMapping labelMapping = exampleSet.getAttributes().getLabel().getMapping();
    String classNeg = labelMapping.getNegativeString();
    String classPos = labelMapping.getPositiveString();
    double classValueNeg = labelMapping.getNegativeIndex();
    int numberOfAttributes = exampleSet.getAttributes().size();
    HyperplaneModel model = new HyperplaneModel(exampleSet, classNeg, classPos, kernel);
    model.init(new double[numberOfAttributes], 0);
    for (int round = 0; round <= getParameterAsInt(PARAMETER_ROUNDS); round++) {
      double learnRate = getLearnRate(round, getParameterAsInt(PARAMETER_ROUNDS), initLearnRate);
      Attributes attributes = exampleSet.getAttributes();
      for (Example example : exampleSet) {
        double prediction = model.predict(example);
        if (prediction != example.getLabel()) {
          double direction = (example.getLabel() == classValueNeg) ? -1 : 1;
          // adapting intercept
          model.setIntercept(model.getIntercept() + learnRate * direction);
          // adapting coefficients
          double coefficients[] = model.getCoefficients();
          int i = 0;
          for (Attribute attribute : attributes) {
            coefficients[i] += learnRate * direction * example.getValue(attribute);
            i++;
          }
        }
      }
    }
    return model;
  }
  @Override
  public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet)
      throws OperatorException {
    boolean sortMappings = getParameterAsBoolean(PARAMETER_SORT_MAPPING_ALPHABETICALLY);

    Map<String, MappingTranslation> translations = new HashMap<String, MappingTranslation>();

    exampleSet.recalculateAllAttributeStatistics();
    for (Attribute attribute : exampleSet.getAttributes()) {
      MappingTranslation translation =
          new MappingTranslation((NominalMapping) attribute.getMapping().clone());
      if (attribute.isNominal()) {
        for (String value : attribute.getMapping().getValues()) {
          double count = exampleSet.getStatistics(attribute, Statistics.COUNT, value);
          if (count > 0) {
            translation.newMapping.mapString(value);
          }
        }
        if (translation.newMapping.size() < attribute.getMapping().size()) {
          if (sortMappings) {
            translation.newMapping.sortMappings();
          }
          translations.put(attribute.getName(), translation);
        }
      }
    }
    return new RemoveUnusedNominalValuesModel(exampleSet, translations);
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    String attributeName = getParameterAsString(PARAMETER_ATTRIBUTE_NAME);
    Long offset = (long) getParameterAsInt(PARMETER_TIME_OFFSET);

    Attribute numericalAttribute = exampleSet.getAttributes().get(attributeName);
    if (numericalAttribute == null) {
      throw new UserError(this, 111, attributeName);
    }

    Attribute newAttribute = AttributeFactory.createAttribute(Ontology.DATE_TIME);
    exampleSet.getExampleTable().addAttribute(newAttribute);
    exampleSet.getAttributes().addRegular(newAttribute);

    for (Example example : exampleSet) {
      double value = example.getValue(numericalAttribute);
      if (Double.isNaN(value)) {
        example.setValue(newAttribute, value);
      } else {
        value += offset;
        example.setValue(newAttribute, value);
      }
    }

    if (!getParameterAsBoolean(PARAMETER_KEEP_OLD_ATTRIBUTE)) {
      exampleSet.getAttributes().remove(numericalAttribute);
      newAttribute.setName(attributeName);
    } else {
      newAttribute.setName(attributeName + "_AS_DATE");
    }
    return exampleSet;
  }
  private BasicNetwork getNetwork(ExampleSet exampleSet) throws OperatorException {
    BasicNetwork network = new BasicNetwork();

    // input layer
    network.addLayer(new FeedforwardLayer(exampleSet.getAttributes().size()));

    // hidden layers
    log("No hidden layers defined. Using default hidden layers.");
    int layerSize = getParameterAsInt(PARAMETER_DEFAULT_HIDDEN_LAYER_SIZE);
    if (layerSize <= 0) layerSize = getDefaultLayerSize(exampleSet);
    for (int p = 0; p < getParameterAsInt(PARAMETER_DEFAULT_NUMBER_OF_HIDDEN_LAYERS); p++) {
      network.addLayer(new FeedforwardLayer(layerSize));
    }

    // output layer
    if (exampleSet.getAttributes().getLabel().isNominal()) {
      network.addLayer(new FeedforwardLayer(new ActivationSigmoid(), 1));
    } else {
      network.addLayer(new FeedforwardLayer(new ActivationLinear(), 1));
    }

    network.reset(
        RandomGenerator.getRandomGenerator(
            getParameterAsBoolean(RandomGenerator.PARAMETER_USE_LOCAL_RANDOM_SEED),
            getParameterAsInt(RandomGenerator.PARAMETER_LOCAL_RANDOM_SEED)));

    return network;
  }
 /** Creates the partition builder for the given sampling type. */
 private static PartitionBuilder createPartitionBuilder(
     ExampleSet exampleSet, int samplingType, int seed) {
   PartitionBuilder builder = null;
   switch (samplingType) {
     case LINEAR_SAMPLING:
       builder = new SimplePartitionBuilder();
       break;
     case SHUFFLED_SAMPLING:
       builder = new ShuffledPartitionBuilder(true, seed);
       break;
     case STRATIFIED_SAMPLING:
     default:
       Attribute label = exampleSet.getAttributes().getLabel();
       if ((label != null) && (label.isNominal()))
         builder = new StratifiedPartitionBuilder(exampleSet, true, seed);
       else {
         exampleSet
             .getLog()
             .logNote(
                 "Example set has no nominal label: using shuffled partition instead of stratified partition!");
         builder = new ShuffledPartitionBuilder(true, seed);
       }
       break;
   }
   return builder;
 }
Пример #16
0
  /** @see com.rapidminer.operator.OperatorChain#doWork() */
  @Override
  public void doWork() throws OperatorException {

    List<Operator> nested = this.getImmediateChildren();
    log.info("This StreamProcess has {} nested operators", nested.size());
    for (Operator op : nested) {
      log.info("  op: {}", op);

      if (op instanceof DataStreamOperator) {
        log.info("Resetting stream-operator {}", op);
        ((DataStreamOperator) op).reset();
      }
    }

    log.info("Starting some work in doWork()");
    ExampleSet exampleSet = input.getData(ExampleSet.class);
    log.info("input is an example set with {} examples", exampleSet.size());
    int i = 0;

    Iterator<Example> it = exampleSet.iterator();
    while (it.hasNext()) {
      Example example = it.next();
      log.info("Processing example {}", i);
      DataObject datum = StreamUtils.wrap(example);
      log.info("Wrapped data-object is: {}", datum);
      dataStream.deliver(datum);
      getSubprocess(0).execute();
      inApplyLoop();
      i++;
    }

    // super.doWork();
    log.info("doWork() is finished.");
  }
  @Override
  public void doWork() throws OperatorException {
    CentroidClusterModel model = modelInput.getData(CentroidClusterModel.class);

    Attributes trainAttributes = model.getTrainingHeader().getAttributes();
    String[] attributeNames = model.getAttributeNames();
    Attribute[] attributes = new Attribute[attributeNames.length + 1];
    for (int i = 0; i < attributeNames.length; i++) {
      Attribute originalAttribute = trainAttributes.get(attributeNames[i]);
      attributes[i] =
          AttributeFactory.createAttribute(attributeNames[i], originalAttribute.getValueType());
      if (originalAttribute.isNominal()) {
        attributes[i].setMapping((NominalMapping) originalAttribute.getMapping().clone());
      }
    }
    Attribute clusterAttribute = AttributeFactory.createAttribute("cluster", Ontology.NOMINAL);
    attributes[attributes.length - 1] = clusterAttribute;

    MemoryExampleTable table = new MemoryExampleTable(attributes);
    for (int i = 0; i < model.getNumberOfClusters(); i++) {
      double[] data = new double[attributeNames.length + 1];
      System.arraycopy(model.getCentroidCoordinates(i), 0, data, 0, attributeNames.length);
      data[attributeNames.length] = clusterAttribute.getMapping().mapString("cluster_" + i);
      table.addDataRow(new DoubleArrayDataRow(data));
    }

    ExampleSet resultSet = table.createExampleSet();
    resultSet.getAttributes().setSpecialAttribute(clusterAttribute, Attributes.CLUSTER_NAME);

    modelOutput.deliver(model);
    exampleSetOutput.deliver(resultSet);
  }
  @Override
  public ExampleSet read() throws OperatorException {
    FileInputStream inStream = null;
    try {
      inStream = new FileInputStream(getParameterAsFile(PARAMETER_FASTA_FILE_NAME));
    } catch (FileNotFoundException e) {
      // TODO: "Fill"
    }
    FastaReader<DNASequence, NucleotideCompound> fastaReader =
        new FastaReader<DNASequence, NucleotideCompound>(
            inStream,
            new GenericFastaHeaderParser<DNASequence, NucleotideCompound>(),
            new DNASequenceCreator(DNACompoundSet.getDNACompoundSet()));
    LinkedHashMap<String, DNASequence> b = null;
    try {
      b = fastaReader.process();
    } catch (Exception e) {
      // TODO: "Fill"
    }
    String data[][] = new String[0][];
    if (b != null) {
      data = new String[b.size()][2];
      int i = 0;
      for (Map.Entry<String, DNASequence> entry : b.entrySet()) {
        data[i][0] = entry.getValue().getOriginalHeader();
        data[i][1] = entry.getValue().getSequenceAsString();
        i++;
      }
    }

    ExampleSet outSet = ExampleSetFactory.createExampleSet(data);
    outSet.getAttributes().get("att1").setName("DNA name");
    outSet.getAttributes().get("att2").setName("Chain");
    return outSet;
  }
  private List<AggregationFunction> createAggreationFunctions(ExampleSet exampleSet)
      throws OperatorException {
    // load global switches
    boolean ignoreMissings = getParameterAsBoolean(PARAMETER_IGNORE_MISSINGS);
    boolean countOnlyDistinct = getParameterAsBoolean(PARAMETER_ONLY_DISTINCT);

    // creating data structures for building aggregates
    List<AggregationFunction> aggregationFunctions = new LinkedList<AggregationFunction>();

    // building functions for all explicitly defined aggregation attributes
    Set<Attribute> explicitlyAggregatedAttributes = new HashSet<Attribute>();
    List<String[]> aggregationFunctionPairs = getParameterList(PARAMETER_AGGREGATION_ATTRIBUTES);
    for (String[] aggregationFunctionPair : aggregationFunctionPairs) {
      Attribute attribute = exampleSet.getAttributes().get(aggregationFunctionPair[0]);
      if (attribute == null) {
        throw new UserError(
            this, "aggregation.aggregation_attribute_not_present", aggregationFunctionPair[0]);
      }
      AggregationFunction function =
          AggregationFunction.createAggregationFunction(
              aggregationFunctionPair[1], attribute, ignoreMissings, countOnlyDistinct);
      if (!function.isCompatible()) {
        throw new UserError(
            this,
            "aggregation.incompatible_attribute_type",
            attribute.getName(),
            aggregationFunctionPair[1]);
      }
      // adding objects for this attribute to structure
      explicitlyAggregatedAttributes.add(attribute);
      aggregationFunctions.add(function);
    }

    // building the default aggregations
    if (getParameterAsBoolean(PARAMETER_USE_DEFAULT_AGGREGATION)) {
      String defaultAggregationFunctionName =
          getParameterAsString(PARAMETER_DEFAULT_AGGREGATION_FUNCTION);

      Iterator<Attribute> iterator =
          attributeSelector.getAttributeSubset(exampleSet, false).iterator();
      if (getCompatibilityLevel().isAtMost(VERSION_5_2_8)) {
        iterator = exampleSet.getAttributes().iterator();
      }

      while (iterator.hasNext()) {
        Attribute attribute = iterator.next();
        if (!explicitlyAggregatedAttributes.contains(attribute)) {
          AggregationFunction function =
              AggregationFunction.createAggregationFunction(
                  defaultAggregationFunctionName, attribute, ignoreMissings, countOnlyDistinct);
          if (function.isCompatible()) {
            aggregationFunctions.add(function);
          }
        }
      }
    }

    return aggregationFunctions;
  }
Пример #20
0
  /**
   * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha
   * values and b are zero, the label will be set if it is known.
   */
  public SVMExamples(
      com.rapidminer.example.ExampleSet exampleSet,
      Attribute labelAttribute,
      Map<Integer, MeanVariance> meanVariances) {
    this(exampleSet.size(), 0.0d);
    this.meanVarianceMap = meanVariances;

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    Attribute idAttribute = exampleSet.getAttributes().getId();
    int exampleCounter = 0;
    while (reader.hasNext()) {
      com.rapidminer.example.Example current = reader.next();
      Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = current.getValue(attribute);
        if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) {
          attributeMap.put(a, value);
        }
        if ((a + 1) > dim) {
          dim = (a + 1);
        }
        a++;
      }
      atts[exampleCounter] = new double[attributeMap.size()];
      index[exampleCounter] = new int[attributeMap.size()];
      Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator();
      int attributeCounter = 0;
      while (i.hasNext()) {
        Map.Entry<Integer, Double> e = i.next();
        Integer indexValue = e.getKey();
        Double attributeValue = e.getValue();
        index[exampleCounter][attributeCounter] = indexValue.intValue();
        double value = attributeValue.doubleValue();
        MeanVariance meanVariance = meanVarianceMap.get(indexValue);
        if (meanVariance != null) {
          if (meanVariance.getVariance() == 0.0d) {
            value = 0.0d;
          } else {
            value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance());
          }
        }
        atts[exampleCounter][attributeCounter] = value;
        attributeCounter++;
      }
      if (labelAttribute != null) {
        double label = current.getValue(labelAttribute);
        if (labelAttribute.isNominal()) {
          ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1);
        } else {
          ys[exampleCounter] = label;
        }
      }
      if (idAttribute != null) {
        ids[exampleCounter] = current.getValueAsString(idAttribute);
      }
      exampleCounter++;
    }
  }
Пример #21
0
 public Model learn(ExampleSet exampleSet) throws OperatorException {
   double value = 0.0;
   double[] confidences = null;
   int method = getParameterAsInt(PARAMETER_METHOD);
   Attribute label = exampleSet.getAttributes().getLabel();
   if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) {
     logWarning(
         "Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!");
     method = MODE;
   } else if ((!label.isNominal()) && (method == MODE)) {
     logWarning(
         "Cannot use method '"
             + METHODS[method]
             + "' for numerical labels: changing to 'average'!");
     method = AVERAGE;
   }
   switch (method) {
     case MEDIAN:
       double[] labels = new double[exampleSet.size()];
       Iterator<Example> r = exampleSet.iterator();
       int counter = 0;
       while (r.hasNext()) {
         Example example = r.next();
         labels[counter++] = example.getValue(example.getAttributes().getLabel());
       }
       java.util.Arrays.sort(labels);
       value = labels[exampleSet.size() / 2];
       break;
     case AVERAGE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.AVERAGE);
       break;
     case MODE:
       exampleSet.recalculateAttributeStatistics(label);
       value = exampleSet.getStatistics(label, Statistics.MODE);
       confidences = new double[label.getMapping().size()];
       for (int i = 0; i < confidences.length; i++) {
         confidences[i] =
             exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i))
                 / exampleSet.size();
       }
       break;
     case CONSTANT:
       value = getParameterAsDouble(PARAMETER_CONSTANT);
       break;
     case ATTRIBUTE:
       return new AttributeDefaultModel(
           exampleSet, getParameterAsString(PARAMETER_ATTRIBUTE_NAME));
     default:
       // cannot happen
       throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!");
   }
   log(
       "Default value is '"
           + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "")
           + "'.");
   return new DefaultModel(exampleSet, value, confidences);
 }
Пример #22
0
 /**
  * Helper method replacing <code>Model.createPredictedLabel(ExampleSet)</code> in order to lower
  * memory consumption.
  */
 private static void createOrReplacePredictedLabelFor(ExampleSet exampleSet, Model model) {
   Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel();
   if (predictedLabel != null) { // remove old predicted label
     exampleSet.getAttributes().remove(predictedLabel);
     exampleSet.getExampleTable().removeAttribute(predictedLabel);
   }
   // model.createPredictedLabel(exampleSet); // not longer necessary since
   // label creation is done by model.apply(...).
 }
Пример #23
0
 /*
  * Extracts an example set containing just the two specified
  * attributes and no missing values.
  *
  * @param eSet the source example set
  * @param a the first attribute to extract
  * @param b the second attribute to extract
  * @return the reduced example set
  */
 private static ExampleSet extract(ExampleSet eSet, Attribute a, Attribute b) {
   // create a new example set containing just attributes a and b
   ExampleSet e = (ExampleSet) eSet.clone();
   e.getAttributes().clearRegular();
   e.getAttributes().clearSpecial();
   e.getAttributes().addRegular(a);
   e.getAttributes().addRegular(b);
   return new ConditionedExampleSet(e, new NoMissingAttributesCondition(e, null));
 }
Пример #24
0
  @Override
  public void doWork() throws OperatorException {

    ExampleSet exampleSet = exampleSetInput.getData();

    IEntityMapping user_mapping = new EntityMapping();
    IEntityMapping item_mapping = new EntityMapping();
    IRatings training_data = new Ratings();

    if (exampleSet.getAttributes().getSpecial("user identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getSpecial("item identification") == null) {
      throw new UserError(this, 105);
    }

    if (exampleSet.getAttributes().getLabel() == null) {
      throw new UserError(this, 105);
    }

    Attributes Att = exampleSet.getAttributes();
    AttributeRole ur = Att.getRole("user identification");
    Attribute u = ur.getAttribute();
    AttributeRole ir = Att.getRole("item identification");
    Attribute i = ir.getAttribute();
    Attribute ui = Att.getLabel();

    for (Example example : exampleSet) {

      double j = example.getValue(u);
      int uid = user_mapping.ToInternalID((int) j);

      j = example.getValue(i);
      int iid = item_mapping.ToInternalID((int) j);

      double r = example.getValue(ui);
      training_data.Add(uid, iid, r);
    }

    _slopeOne recommendAlg = new _slopeOne();

    recommendAlg.user_mapping = user_mapping;
    recommendAlg.item_mapping = item_mapping;
    recommendAlg.SetMinRating(getParameterAsInt("Min Rating"));
    recommendAlg.SetMaxRating(recommendAlg.GetMinRating() + getParameterAsInt("Range"));

    recommendAlg.SetRatings(training_data);

    recommendAlg.Train();

    exampleSetOutput.deliver(exampleSet);

    exampleSetOutput1.deliver(recommendAlg);
  }
  @Override
  public Model learn(ExampleSet exampleSet) throws OperatorException {
    DistanceMeasure measure = DistanceMeasures.createMeasure(this);
    measure.init(exampleSet);
    GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure);

    // check if weights should be used
    boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS);
    // check if robust estimate should be performed: Then calculate weights and use it anyway
    if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) {
      useWeights = true;
      LocalPolynomialExampleWeightingOperator weightingOperator;
      try {
        weightingOperator =
            OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class);
        exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this);
      } catch (OperatorCreationException e) {
        throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage());
      }
    }

    Attributes attributes = exampleSet.getAttributes();
    Attribute label = attributes.getLabel();
    Attribute weightAttribute = attributes.getWeight();
    for (Example example : exampleSet) {
      double[] values = new double[attributes.size()];
      double labelValue = example.getValue(label);
      double weight = 1d;
      if (weightAttribute != null && useWeights) {
        weight = example.getValue(weightAttribute);
      }

      // filter out examples without influence
      if (weight > 0d) {
        // copying example values
        int i = 0;
        for (Attribute attribute : attributes) {
          values[i] = example.getValue(attribute);
          i++;
        }

        // inserting into geometric data collection
        data.add(values, new RegressionData(values, labelValue, weight));
      }
    }
    return new LocalPolynomialRegressionModel(
        exampleSet,
        data,
        Neighborhoods.createNeighborhood(this),
        SmoothingKernels.createKernel(this),
        getParameterAsInt(PARAMETER_DEGREE),
        getParameterAsDouble(PARAMETER_RIDGE));
  }
 // checking for example set and valid attributes
 @Override
 public void init(ExampleSet exampleSet) throws OperatorException {
   super.init(exampleSet);
   Tools.onlyNominalAttributes(exampleSet, "nominal similarities");
   this.useAttribute = new boolean[exampleSet.getAttributes().size()];
   int i = 0;
   for (Attribute attribute : exampleSet.getAttributes()) {
     if (attribute.isNominal()) {
       useAttribute[i] = true;
     }
     i++;
   }
 }
Пример #27
0
 private void restoreOldWeights(ExampleSet exampleSet) {
   if (this.oldWeights != null) { // need to reset weights
     Iterator<Example> reader = exampleSet.iterator();
     int i = 0;
     while (reader.hasNext() && i < this.oldWeights.length) {
       reader.next().setWeight(this.oldWeights[i++]);
     }
   } else { // need to delete the weights attribute
     Attribute weight = exampleSet.getAttributes().getWeight();
     exampleSet.getAttributes().remove(weight);
     exampleSet.getExampleTable().removeAttribute(weight);
   }
 }
 public boolean hasNext() {
   if (this.nextInvoked) {
     this.nextInvoked = false;
     this.currentIndex++;
     if (this.currentIndex < parent.size()) {
       this.currentExample = this.parent.getExample(this.currentIndex);
       return true;
     } else {
       return false;
     }
   }
   return (this.currentIndex < parent.size());
 }
 private double[] getMeanVector(ExampleSet exampleSet) {
   exampleSet.recalculateAllAttributeStatistics();
   Attributes attributes = exampleSet.getAttributes();
   double[] meanVector = new double[attributes.size()];
   int i = 0;
   for (Attribute attribute : attributes) {
     if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
       meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MINIMUM);
     } else if (attribute.isNominal())
       meanVector[i] = exampleSet.getStatistics(attribute, Statistics.MODE);
     else meanVector[i] = exampleSet.getStatistics(attribute, Statistics.AVERAGE);
     i++;
   }
   return meanVector;
 }
  public static SplittedExampleSet splitByAttribute(
      ExampleSet exampleSet, Attribute attribute, double value) {
    int[] elements = new int[exampleSet.size()];
    Iterator<Example> reader = exampleSet.iterator();
    int i = 0;

    while (reader.hasNext()) {
      Example example = reader.next();
      double currentValue = example.getValue(attribute);
      if (currentValue <= value) elements[i++] = 0;
      else elements[i++] = 1;
    }
    Partition partition = new Partition(elements, 2);
    return new SplittedExampleSet(exampleSet, partition);
  }