Ejemplo n.º 1
0
  /**
   * loading training data
   *
   * @param mergedAlgSimVec algebra instances normally for applying the remove useless and
   *     standardization filtering
   * @return
   * @throws Exception
   */
  public static AttributeFilterMeta refineInstances(Instances mergedAlgSimVec) throws Exception {
    AttributeFilterMeta res = new AttributeFilterMeta();
    Map<String, Integer> attributeIndex = new HashMap<String, Integer>();

    // List<Attribute> atts = new ArrayList<Attribute>();
    // mergedAlgSimVecClass.attribute(0).
    for (int i = 0; i < mergedAlgSimVec.numAttributes(); i++) {
      attributeIndex.put(mergedAlgSimVec.attribute(i).name(), i);
    }

    Instances mergedUselessFilteredAlgSimVecClass = removeUseless(mergedAlgSimVec);

    Standardize stdFilter = new Standardize(); // kepp in meta
    stdFilter.setInputFormat(
        mergedUselessFilteredAlgSimVecClass); // initializing the filter once with training set

    Instances stdFilterdInstances = standardize(mergedUselessFilteredAlgSimVecClass, stdFilter);

    // record removed attributes/columns in a matrix
    Set<String> selectedAtt = new HashSet<String>();

    for (int i = 0; i < stdFilterdInstances.numAttributes(); i++) {
      selectedAtt.add(stdFilterdInstances.attribute(i).name());
    }

    List<Integer> deletedAttIndex = new ArrayList<Integer>();
    for (Entry<String, Integer> e : attributeIndex.entrySet()) {
      if (selectedAtt.contains(e.getKey()) == false) {
        deletedAttIndex.add(e.getValue());
      }
    }
    int[] removedAttributes = Ints.toArray(deletedAttIndex);

    res.setInstances(stdFilterdInstances);
    res.setRemovedAttributes(removedAttributes);
    res.setStandardizeFilter(stdFilter);

    return res;
  }