示例#1
0
  public static Instances refineInstances(Instances algebraInstances, AttributeFilterMeta meta)
      throws Exception {

    // merge algebra and simvec
    // Instances mergedAlgSimVec = Instances.mergeInstances(algebraInstances, simVecInstances);

    // remove the same attributes that were removed in training data
    // System.out.println(meta);
    // System.out.println(Arrays.toString(meta.getRemovedAttributes()));
    Instances filteredAlgSimVecClass =
        removeAttributes(algebraInstances, meta.getRemovedAttributes());

    // stdFilter.setInputFormat(mergedUselessFilteredAlgSimVecClass);

    Instances stdFilterdInstances =
        Filter.useFilter(filteredAlgSimVecClass, meta.getStandardizeFilter());

    return stdFilterdInstances;
  }
示例#2
0
  /**
   * use for validation and test data
   *
   * @param instancesWithMeta
   * @param labelInstances
   * @return
   * @throws Exception
   */
  public static Instances addNominalLabelsForClassification(
      Instances instances, AttributeFilterMeta instancesWithMeta, Instances labelInstances)
      throws Exception {
    // to add all the available classes in the definition, otherwise just considers the available
    // classes in a dataset

    AddValues addValFilter = new AddValues();
    addValFilter.setSort(true);
    addValFilter.setAttributeIndex("last");
    addValFilter.setLabels(instancesWithMeta.getClassAtrributeValues());
    addValFilter.setInputFormat(labelInstances);

    Instances filteredlabelInstances = Filter.useFilter(labelInstances, addValFilter);

    Instances finalInstances = Instances.mergeInstances(instances, filteredlabelInstances);
    finalInstances.setClassIndex(finalInstances.numAttributes() - 1);
    instancesWithMeta.setInstances(finalInstances);
    return finalInstances;
  }
示例#3
0
  /**
   * use for regression training, validation, test data
   *
   * @param instancesWithMeta
   * @param labelInstances
   * @return
   * @throws Exception
   */
  public static Instances addNumericLablesForRegression(
      Instances instances, AttributeFilterMeta instancesWithMeta, Instances labelInstances)
      throws Exception {

    Instances finalCleaned = Instances.mergeInstances(instances, labelInstances);

    finalCleaned.setClassIndex(finalCleaned.numAttributes() - 1);
    instancesWithMeta.setInstances(finalCleaned);

    return finalCleaned;
  }
示例#4
0
  /**
   * use for training data
   *
   * @param instancesWithMeta
   * @param labelInstances
   * @return
   * @throws Exception
   */
  public static Instances addNominalLabelsForClassificationToTrainingData(
      Instances instances, AttributeFilterMeta instancesWithMeta, Instances labelInstances)
      throws Exception {

    Instances finalCleaned = Instances.mergeInstances(instances, labelInstances);
    finalCleaned.setClassIndex(finalCleaned.numAttributes() - 1);

    Attribute classAt = finalCleaned.classAttribute();
    int numOfAttValues = classAt.numValues();
    String attValues = "";
    for (int nai = 0; nai < numOfAttValues; nai++) {
      if (nai != 0) {
        attValues += ",";
      }
      attValues += classAt.value(nai);
    }
    instancesWithMeta.setClassAtrributeValues(attValues);

    instancesWithMeta.setInstances(finalCleaned);

    return finalCleaned;
  }
示例#5
0
  /**
   * loading training data
   *
   * @param mergedAlgSimVec algebra instances normally for applying the remove useless and
   *     standardization filtering
   * @return
   * @throws Exception
   */
  public static AttributeFilterMeta refineInstances(Instances mergedAlgSimVec) throws Exception {
    AttributeFilterMeta res = new AttributeFilterMeta();
    Map<String, Integer> attributeIndex = new HashMap<String, Integer>();

    // List<Attribute> atts = new ArrayList<Attribute>();
    // mergedAlgSimVecClass.attribute(0).
    for (int i = 0; i < mergedAlgSimVec.numAttributes(); i++) {
      attributeIndex.put(mergedAlgSimVec.attribute(i).name(), i);
    }

    Instances mergedUselessFilteredAlgSimVecClass = removeUseless(mergedAlgSimVec);

    Standardize stdFilter = new Standardize(); // kepp in meta
    stdFilter.setInputFormat(
        mergedUselessFilteredAlgSimVecClass); // initializing the filter once with training set

    Instances stdFilterdInstances = standardize(mergedUselessFilteredAlgSimVecClass, stdFilter);

    // record removed attributes/columns in a matrix
    Set<String> selectedAtt = new HashSet<String>();

    for (int i = 0; i < stdFilterdInstances.numAttributes(); i++) {
      selectedAtt.add(stdFilterdInstances.attribute(i).name());
    }

    List<Integer> deletedAttIndex = new ArrayList<Integer>();
    for (Entry<String, Integer> e : attributeIndex.entrySet()) {
      if (selectedAtt.contains(e.getKey()) == false) {
        deletedAttIndex.add(e.getValue());
      }
    }
    int[] removedAttributes = Ints.toArray(deletedAttIndex);

    res.setInstances(stdFilterdInstances);
    res.setRemovedAttributes(removedAttributes);
    res.setStandardizeFilter(stdFilter);

    return res;
  }