/**
   * Creates a fresh example set of the given size from the RapidMiner example reader. The alpha
   * values and b are zero, the label will be set if it is known.
   */
  public SVMExamples(
      com.rapidminer.example.ExampleSet exampleSet,
      Attribute labelAttribute,
      Map<Integer, MeanVariance> meanVariances) {
    this(exampleSet.size(), 0.0d);
    this.meanVarianceMap = meanVariances;

    Iterator<com.rapidminer.example.Example> reader = exampleSet.iterator();
    Attribute idAttribute = exampleSet.getAttributes().getId();
    int exampleCounter = 0;
    while (reader.hasNext()) {
      com.rapidminer.example.Example current = reader.next();
      Map<Integer, Double> attributeMap = new LinkedHashMap<Integer, Double>();
      int a = 0;
      for (Attribute attribute : exampleSet.getAttributes()) {
        double value = current.getValue(attribute);
        if (!com.rapidminer.example.Tools.isDefault(attribute.getDefault(), value)) {
          attributeMap.put(a, value);
        }
        if ((a + 1) > dim) {
          dim = (a + 1);
        }
        a++;
      }
      atts[exampleCounter] = new double[attributeMap.size()];
      index[exampleCounter] = new int[attributeMap.size()];
      Iterator<Map.Entry<Integer, Double>> i = attributeMap.entrySet().iterator();
      int attributeCounter = 0;
      while (i.hasNext()) {
        Map.Entry<Integer, Double> e = i.next();
        Integer indexValue = e.getKey();
        Double attributeValue = e.getValue();
        index[exampleCounter][attributeCounter] = indexValue.intValue();
        double value = attributeValue.doubleValue();
        MeanVariance meanVariance = meanVarianceMap.get(indexValue);
        if (meanVariance != null) {
          if (meanVariance.getVariance() == 0.0d) {
            value = 0.0d;
          } else {
            value = (value - meanVariance.getMean()) / Math.sqrt(meanVariance.getVariance());
          }
        }
        atts[exampleCounter][attributeCounter] = value;
        attributeCounter++;
      }
      if (labelAttribute != null) {
        double label = current.getValue(labelAttribute);
        if (labelAttribute.isNominal()) {
          ys[exampleCounter] = (label == labelAttribute.getMapping().getPositiveIndex() ? 1 : -1);
        } else {
          ys[exampleCounter] = label;
        }
      }
      if (idAttribute != null) {
        ids[exampleCounter] = current.getValueAsString(idAttribute);
      }
      exampleCounter++;
    }
  }
  public Graph<String, String> createGraph() {
    graph = new UndirectedSparseGraph<String, String>();

    Attribute id = exampleSet.getAttributes().getId();
    if (id != null) {
      for (Example example : exampleSet) {
        graph.addVertex(example.getValueAsString(id));
      }
      addEdges();
    }
    return graph;
  }
  public LinkedList<String> getAllCategories(Attribute attribute) {
    LinkedList<String> allCategoryList = new LinkedList<String>();

    Iterator<Example> reader = this.iterator();

    while (reader.hasNext()) {
      Example example = reader.next();
      String currentValue = example.getValueAsString(attribute);
      if (!inList(currentValue, allCategoryList)) allCategoryList.add(currentValue);
    }

    // return new SplittedExampleSet(exampleSet, partition);
    return allCategoryList;
  }
Esempio n. 4
0
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);

    Attribute attribute =
        exampleSet.getAttributes().get(getParameterAsString(PARAMETER_ATTRIBUTE_NAME));
    if (attribute == null)
      throw new UserError(this, 111, getParameterAsString(PARAMETER_ATTRIBUTE_NAME));

    int index = getParameterAsInt(PARAMETER_EXAMPLE_INDEX);
    if (index == 0) {
      throw new UserError(
          this, 207, "0", PARAMETER_EXAMPLE_INDEX, "only positive or negative indices are allowed");
    }

    if (index < 0) {
      index = exampleSet.size() + index;
    } else {
      index--;
    }

    if (index >= exampleSet.size()) {
      throw new UserError(this, 110, index);
    }

    Example example = exampleSet.getExample(index);
    if (attribute.isNominal()
        || Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
      currentValue = example.getValueAsString(attribute);
      isNominal = true;
    } else {
      currentValue = Double.valueOf(example.getValue(attribute));
      isNominal = false;
    }

    exampleSetOutput.deliver(exampleSet);
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // creating data structures for building aggregates
    List<AggregationFunction> aggregationFunctions = createAggreationFunctions(exampleSet);

    // getting attributes that define groups and weights
    Attribute[] groupAttributes =
        getMatchingAttributes(
            exampleSet.getAttributes(), getParameterAsString(PARAMETER_GROUP_BY_ATTRIBUTES));
    Attribute weightAttribute = exampleSet.getAttributes().getWeight();
    boolean useWeights = weightAttribute != null;

    // running over exampleSet and aggregate data of each example
    AggregationTreeNode rootNode = new AggregationTreeNode();
    LeafAggregationTreeNode leafNode = null;
    if (groupAttributes.length == 0) {
      // if no grouping, we will directly insert into leaf node
      leafNode = new LeafAggregationTreeNode(aggregationFunctions);
    }
    for (Example example : exampleSet) {
      if (groupAttributes.length > 0) {
        AggregationTreeNode currentNode = rootNode;
        // now traversing aggregation tree for m-1 group attributes
        for (int i = 0; i < groupAttributes.length - 1; i++) {
          Attribute currentAttribute = groupAttributes[i];
          if (currentAttribute.isNominal()) {
            currentNode = currentNode.getOrCreateChild(example.getValueAsString(currentAttribute));
          } else {
            currentNode = currentNode.getOrCreateChild(example.getValue(currentAttribute));
          }
        }

        // now we have to get the leaf node containing the aggregators
        Attribute currentAttribute = groupAttributes[groupAttributes.length - 1];
        if (currentAttribute.isNominal()) {
          leafNode =
              currentNode.getOrCreateLeaf(
                  example.getValueAsString(currentAttribute), aggregationFunctions);
        } else {
          leafNode =
              currentNode.getOrCreateLeaf(example.getValue(currentAttribute), aggregationFunctions);
        }
      }
      // now count current example
      if (!useWeights) leafNode.count(example);
      else leafNode.count(example, example.getValue(weightAttribute));
    }

    // now derive new example set from aggregated values
    boolean isCountingAllCombinations = getParameterAsBoolean(PARAMETER_ALL_COMBINATIONS);

    // building new attributes from grouping attributes and aggregation functions
    Attribute[] newAttributes = new Attribute[groupAttributes.length + aggregationFunctions.size()];
    for (int i = 0; i < groupAttributes.length; i++) {
      newAttributes[i] = AttributeFactory.createAttribute(groupAttributes[i]);
    }
    int i = groupAttributes.length;
    for (AggregationFunction function : aggregationFunctions) {
      newAttributes[i] = function.getTargetAttribute();
      i++;
    }

    // creating example table
    MemoryExampleTable table = new MemoryExampleTable(newAttributes);
    ;
    DataRowFactory factory = new DataRowFactory(DataRowFactory.TYPE_DOUBLE_ARRAY, '.');
    double[] dataOfUpperLevels = new double[groupAttributes.length];

    // prepare empty lists
    ArrayList<List<Aggregator>> allAggregators = new ArrayList<List<Aggregator>>();
    for (int aggregatorIdx = 0; aggregatorIdx < aggregationFunctions.size(); ++aggregatorIdx) {
      allAggregators.add(new ArrayList<Aggregator>());
    }

    ArrayList<double[]> allGroupCombinations = new ArrayList<double[]>();

    if (groupAttributes.length > 0) {
      // going through all possible groups recursively
      parseTree(
          rootNode,
          groupAttributes,
          dataOfUpperLevels,
          0,
          allGroupCombinations,
          allAggregators,
          factory,
          newAttributes,
          isCountingAllCombinations,
          aggregationFunctions);
    } else {
      // just enter values from single leaf node
      parseLeaf(
          leafNode,
          dataOfUpperLevels,
          allGroupCombinations,
          allAggregators,
          factory,
          newAttributes,
          aggregationFunctions);
    }

    // apply post-processing
    int currentFunctionIdx = 0;
    for (AggregationFunction aggregationFunction : aggregationFunctions) {
      aggregationFunction.postProcessing(allAggregators.get(currentFunctionIdx));
      ++currentFunctionIdx;
    }

    // write data into table
    int currentRow = 0;
    for (double[] groupValues : allGroupCombinations) {
      double[] rowData = new double[newAttributes.length];

      // copy group values into row
      System.arraycopy(groupValues, 0, rowData, 0, groupValues.length);
      DoubleArrayDataRow dataRow = new DoubleArrayDataRow(rowData);

      // copy aggregated values into row
      int currentColumn = groupValues.length;
      for (List<Aggregator> aggregatorsForColumn : allAggregators) {
        Aggregator aggregatorForCurrentCell = aggregatorsForColumn.get(currentRow);
        Attribute currentAttribute = newAttributes[currentColumn];
        if (aggregatorForCurrentCell != null) {
          aggregatorForCurrentCell.set(currentAttribute, dataRow);
        } else {
          aggregationFunctions
              .get(currentColumn - groupAttributes.length)
              .setDefault(currentAttribute, dataRow);
        }
        ++currentColumn;
      }
      table.addDataRow(dataRow);
      ++currentRow;
    }

    // postprocessing for remaining compatibility: Old versions automatically added group "all".
    // Must remain this way for old operator
    // version
    if (getCompatibilityLevel().isAtMost(VERSION_5_1_6)) {
      if (groupAttributes.length == 0) {
        Attribute resultGroupAttribute =
            AttributeFactory.createAttribute(GENERIC_GROUP_NAME, Ontology.NOMINAL);
        table.addAttribute(resultGroupAttribute);
        table
            .getDataRow(0)
            .set(
                resultGroupAttribute,
                resultGroupAttribute.getMapping().mapString(GENERIC_ALL_NAME));

        ExampleSet resultSet = table.createExampleSet();
        resultSet.getAnnotations().addAll(exampleSet.getAnnotations());
        for (Attribute attribute : newAttributes) {
          resultSet.getAttributes().remove(attribute);
          resultSet.getAttributes().addRegular(attribute);
        }
        return resultSet;
      } else {
        // make attributes nominal
        ExampleSet resultSet = table.createExampleSet();
        resultSet.getAnnotations().addAll(exampleSet.getAnnotations());
        try {
          NumericToNominal toNominalOperator =
              OperatorService.createOperator(NumericToPolynominal.class);
          toNominalOperator.setParameter(
              AttributeSubsetSelector.PARAMETER_FILTER_TYPE,
              AttributeSubsetSelector.CONDITION_REGULAR_EXPRESSION + "");
          toNominalOperator.setParameter(
              RegexpAttributeFilter.PARAMETER_REGULAR_EXPRESSION,
              getParameterAsString(PARAMETER_GROUP_BY_ATTRIBUTES));
          toNominalOperator.setParameter(
              AttributeSubsetSelector.PARAMETER_INCLUDE_SPECIAL_ATTRIBUTES, "true");
          return toNominalOperator.apply(resultSet);
        } catch (OperatorCreationException e) {
          // otherwise compatibility could not be ensured
          return resultSet;
        }
      }
    }

    // for recent version table is correct: Deliver example set
    ExampleSet resultSet = table.createExampleSet();
    resultSet.getAnnotations().addAll(exampleSet.getAnnotations());
    return resultSet;
  }
  @Override
  public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
    // init
    char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0);
    Character groupingCharacter = null;
    if (isParameterSet(PARAMETER_NUMBER_GROUPING_CHARACTER)) {
      groupingCharacter = getParameterAsString(PARAMETER_NUMBER_GROUPING_CHARACTER).charAt(0);
    }

    Set<Attribute> attributeSet = attributeSelector.getAttributeSubset(exampleSet, false);
    int size = attributeSet.size();

    int[] valueTypes = new int[size];

    int index = 0;
    for (Attribute attribute : attributeSet) {
      valueTypes[index++] = attribute.getValueType();
    }

    // guessing
    int[] guessedValueTypes = new int[valueTypes.length];
    int checkedCounter = 0;
    for (Example example : exampleSet) {
      index = 0;
      for (Attribute attribute : attributeSet) {
        if (!attribute.isNominal() && !attribute.isNumerical()) {
          continue;
        }

        double originalValue = example.getValue(attribute);
        if (!Double.isNaN(originalValue)) {
          if (guessedValueTypes[index] != Ontology.NOMINAL) {
            try {
              String valueString = example.getValueAsString(attribute);
              if (!Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) {
                if (groupingCharacter != null) {
                  valueString = valueString.replace(groupingCharacter.toString(), "");
                }
                valueString = valueString.replace(decimalPointCharacter, '.');
                double value = Double.parseDouble(valueString);
                if (guessedValueTypes[index] != Ontology.REAL) {
                  if (Tools.isEqual(Math.round(value), value)) {
                    guessedValueTypes[index] = Ontology.INTEGER;
                  } else {
                    guessedValueTypes[index] = Ontology.REAL;
                  }
                }
              }
            } catch (NumberFormatException e) {
              guessedValueTypes[index] = Ontology.NOMINAL;
              checkedCounter++;
            }
          }
        }
        index++;
      }
      if (checkedCounter >= guessedValueTypes.length) {
        break;
      }
    }

    // the example set contains at least one example and the guessing was performed
    if (exampleSet.size() > 0) {
      valueTypes = guessedValueTypes;

      // new attributes
      List<AttributeRole> newAttributes = new LinkedList<AttributeRole>();
      index = 0;
      for (Attribute attribute : attributeSet) {
        if (!attribute.isNominal() && !attribute.isNumerical()) {
          continue;
        }

        AttributeRole role = exampleSet.getAttributes().getRole(attribute);

        Attribute newAttribute = AttributeFactory.createAttribute(valueTypes[index]);
        exampleSet.getExampleTable().addAttribute(newAttribute);
        AttributeRole newRole = new AttributeRole(newAttribute);
        newRole.setSpecial(role.getSpecialName());
        newAttributes.add(newRole);

        // copy data
        for (Example e : exampleSet) {
          double oldValue = e.getValue(attribute);
          if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueTypes[index], Ontology.NUMERICAL)) {
            if (!Double.isNaN(oldValue)) {
              String valueString = e.getValueAsString(attribute);
              if (Attribute.MISSING_NOMINAL_VALUE.equals(valueString)) {
                e.setValue(newAttribute, Double.NaN);
              } else {
                if (groupingCharacter != null) {
                  valueString = valueString.replace(groupingCharacter.toString(), "");
                }
                valueString = valueString.replace(decimalPointCharacter, '.');
                e.setValue(newAttribute, Double.parseDouble(valueString));
              }
            } else {
              e.setValue(newAttribute, Double.NaN);
            }
          } else {
            if (!Double.isNaN(oldValue)) {
              String value = e.getValueAsString(attribute);
              e.setValue(newAttribute, newAttribute.getMapping().mapString(value));
            } else {
              e.setValue(newAttribute, Double.NaN);
            }
          }
        }

        // delete attribute and rename the new attribute (due to deletion and data scans: no
        // more memory used :-)
        exampleSet.getExampleTable().removeAttribute(attribute);
        exampleSet.getAttributes().remove(role);
        newAttribute.setName(attribute.getName());

        index++;
      }

      for (AttributeRole role : newAttributes) {
        if (role.isSpecial()) {
          exampleSet
              .getAttributes()
              .setSpecialAttribute(role.getAttribute(), role.getSpecialName());
        } else {
          exampleSet.getAttributes().addRegular(role.getAttribute());
        }
      }
    }

    return exampleSet;
  }
Esempio n. 7
0
  public static void writeCSV(
      ExampleSet exampleSet,
      PrintWriter out,
      String colSeparator,
      boolean quoteNomValues,
      boolean writeAttribNames,
      boolean formatDate) {
    String columnSeparator = colSeparator;
    boolean quoteNominalValues = quoteNomValues;

    // write column names
    if (writeAttribNames) {
      Iterator<Attribute> a = exampleSet.getAttributes().allAttributes();
      boolean first = true;
      while (a.hasNext()) {
        if (!first) out.print(columnSeparator);
        Attribute attribute = a.next();
        String name = attribute.getName();
        if (quoteNominalValues) {
          name = name.replaceAll("\"", "'");
          name = "\"" + name + "\"";
        }
        out.print(name);
        first = false;
      }
      out.println();
    }

    // write data
    for (Example example : exampleSet) {
      Iterator<Attribute> a = exampleSet.getAttributes().allAttributes();
      boolean first = true;
      while (a.hasNext()) {
        Attribute attribute = a.next();
        if (!first) out.print(columnSeparator);
        if (!Double.isNaN(example.getValue(attribute))) {
          if (attribute.isNominal()) {
            String stringValue = example.getValueAsString(attribute);
            if (quoteNominalValues) {
              stringValue = stringValue.replaceAll("\"", "'");
              stringValue = "\"" + stringValue + "\"";
            }
            out.print(stringValue);
          } else {
            Double value = example.getValue(attribute);
            if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
              if (formatDate) {
                Date date = new Date(value.longValue());
                String s = DateFormat.getInstance().format(date);
                out.print(s);
              } else {
                out.print(value);
              }
            } else {
              out.print(value);
            }
          }
        }
        first = false;
      }
      out.println();
    }
  }
  private void addEdges() {
    // remove old edges if available
    Iterator<String> e = edgeLabelMap.keySet().iterator();
    while (e.hasNext()) {
      graph.removeEdge(e.next());
    }
    edgeLabelMap.clear();

    boolean isDistance = measure.isDistance();
    Attribute id = exampleSet.getAttributes().getId();
    List<SortableEdge> sortableEdges = new LinkedList<SortableEdge>();
    for (int i = 0; i < exampleSet.size(); i++) {
      Example example = exampleSet.getExample(i);
      for (int j = i + 1; j < exampleSet.size(); j++) {
        Example comExample = exampleSet.getExample(j);
        if (isDistance)
          sortableEdges.add(
              new SortableEdge(
                  example.getValueAsString(id),
                  comExample.getValueAsString(id),
                  null,
                  measure.calculateDistance(example, comExample),
                  SortableEdge.DIRECTION_INCREASE));
        else
          sortableEdges.add(
              new SortableEdge(
                  example.getValueAsString(id),
                  comExample.getValueAsString(id),
                  null,
                  measure.calculateSimilarity(example, comExample),
                  SortableEdge.DIRECTION_DECREASE));
      }
    }

    Collections.sort(sortableEdges);

    int numberOfEdges = distanceSlider.getValue();
    int counter = 0;
    double minStrength = Double.POSITIVE_INFINITY;
    double maxStrength = Double.NEGATIVE_INFINITY;
    Map<String, Double> strengthMap = new HashMap<String, Double>();
    for (SortableEdge sortableEdge : sortableEdges) {
      if (counter > numberOfEdges) break;

      String idString = edgeFactory.create();
      graph.addEdge(
          idString,
          sortableEdge.getFirstVertex(),
          sortableEdge.getSecondVertex(),
          EdgeType.UNDIRECTED);
      edgeLabelMap.put(idString, Tools.formatIntegerIfPossible(sortableEdge.getEdgeValue()));

      double strength = sortableEdge.getEdgeValue();

      minStrength = Math.min(minStrength, strength);
      maxStrength = Math.max(maxStrength, strength);

      strengthMap.put(idString, strength);

      counter++;
    }

    for (Entry<String, Double> entry : strengthMap.entrySet()) {
      edgeStrengthMap.put(
          entry.getKey(), (entry.getValue() - minStrength) / (maxStrength - minStrength));
    }
  }
  @Override
  public void doWork() throws OperatorException {
    ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class);
    DistanceMeasure measure = measureHelper.getInitializedMeasure(exampleSet);

    // additional checks
    Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);
    Tools.checkAndCreateIds(exampleSet);

    Attribute idAttribute = exampleSet.getAttributes().getId();
    boolean idAttributeIsNominal = idAttribute.isNominal();
    DistanceMatrix matrix = new DistanceMatrix(exampleSet.size());
    Map<Integer, HierarchicalClusterNode> clusterMap =
        new HashMap<Integer, HierarchicalClusterNode>(exampleSet.size());
    int[] clusterIds = new int[exampleSet.size()];
    // filling the distance matrix
    int nextClusterId = 0;
    for (Example example1 : exampleSet) {
      checkForStop();
      clusterIds[nextClusterId] = nextClusterId;
      int y = 0;
      for (Example example2 : exampleSet) {
        if (y > nextClusterId) {
          matrix.set(nextClusterId, y, measure.calculateDistance(example1, example2));
        }
        y++;
      }
      if (idAttributeIsNominal) {
        clusterMap.put(
            nextClusterId,
            new HierarchicalClusterLeafNode(nextClusterId, example1.getValueAsString(idAttribute)));
      } else {
        clusterMap.put(
            nextClusterId,
            new HierarchicalClusterLeafNode(nextClusterId, example1.getValue(idAttribute)));
      }
      nextClusterId++;
    }

    // creating linkage method
    AbstractLinkageMethod linkage = new SingleLinkageMethod(matrix, clusterIds);
    if (getParameterAsString(PARAMETER_MODE).equals(modes[1])) {
      linkage = new CompleteLinkageMethod(matrix, clusterIds);
    } else if (getParameterAsString(PARAMETER_MODE).equals(modes[2])) {
      linkage = new AverageLinkageMethod(matrix, clusterIds);
    }

    // now building agglomerative tree bottom up
    while (clusterMap.size() > 1) {
      Agglomeration agglomeration = linkage.getNextAgglomeration(nextClusterId, clusterMap);
      HierarchicalClusterNode newNode =
          new HierarchicalClusterNode(nextClusterId, agglomeration.getDistance());
      newNode.addSubNode(clusterMap.get(agglomeration.getClusterId1()));
      newNode.addSubNode(clusterMap.get(agglomeration.getClusterId2()));
      clusterMap.remove(agglomeration.getClusterId1());
      clusterMap.remove(agglomeration.getClusterId2());
      clusterMap.put(nextClusterId, newNode);
      nextClusterId++;
    }

    // creating model
    HierarchicalClusterModel model =
        new DendogramHierarchicalClusterModel(clusterMap.entrySet().iterator().next().getValue());

    // registering visualizer
    ObjectVisualizerService.addObjectVisualizer(
        model, new ExampleVisualizer((ExampleSet) exampleSet.clone()));

    modelOutput.deliver(model);
    exampleSetOutput.deliver(exampleSet);
  }