/** * loading training data * * @param mergedAlgSimVec algebra instances normally for applying the remove useless and * standardization filtering * @return * @throws Exception */ public static AttributeFilterMeta refineInstances(Instances mergedAlgSimVec) throws Exception { AttributeFilterMeta res = new AttributeFilterMeta(); Map<String, Integer> attributeIndex = new HashMap<String, Integer>(); // List<Attribute> atts = new ArrayList<Attribute>(); // mergedAlgSimVecClass.attribute(0). for (int i = 0; i < mergedAlgSimVec.numAttributes(); i++) { attributeIndex.put(mergedAlgSimVec.attribute(i).name(), i); } Instances mergedUselessFilteredAlgSimVecClass = removeUseless(mergedAlgSimVec); Standardize stdFilter = new Standardize(); // kepp in meta stdFilter.setInputFormat( mergedUselessFilteredAlgSimVecClass); // initializing the filter once with training set Instances stdFilterdInstances = standardize(mergedUselessFilteredAlgSimVecClass, stdFilter); // record removed attributes/columns in a matrix Set<String> selectedAtt = new HashSet<String>(); for (int i = 0; i < stdFilterdInstances.numAttributes(); i++) { selectedAtt.add(stdFilterdInstances.attribute(i).name()); } List<Integer> deletedAttIndex = new ArrayList<Integer>(); for (Entry<String, Integer> e : attributeIndex.entrySet()) { if (selectedAtt.contains(e.getKey()) == false) { deletedAttIndex.add(e.getValue()); } } int[] removedAttributes = Ints.toArray(deletedAttIndex); res.setInstances(stdFilterdInstances); res.setRemovedAttributes(removedAttributes); res.setStandardizeFilter(stdFilter); return res; }