public static Instances refineInstances(Instances algebraInstances, AttributeFilterMeta meta) throws Exception { // merge algebra and simvec // Instances mergedAlgSimVec = Instances.mergeInstances(algebraInstances, simVecInstances); // remove the same attributes that were removed in training data // System.out.println(meta); // System.out.println(Arrays.toString(meta.getRemovedAttributes())); Instances filteredAlgSimVecClass = removeAttributes(algebraInstances, meta.getRemovedAttributes()); // stdFilter.setInputFormat(mergedUselessFilteredAlgSimVecClass); Instances stdFilterdInstances = Filter.useFilter(filteredAlgSimVecClass, meta.getStandardizeFilter()); return stdFilterdInstances; }
/** * use for validation and test data * * @param instancesWithMeta * @param labelInstances * @return * @throws Exception */ public static Instances addNominalLabelsForClassification( Instances instances, AttributeFilterMeta instancesWithMeta, Instances labelInstances) throws Exception { // to add all the available classes in the definition, otherwise just considers the available // classes in a dataset AddValues addValFilter = new AddValues(); addValFilter.setSort(true); addValFilter.setAttributeIndex("last"); addValFilter.setLabels(instancesWithMeta.getClassAtrributeValues()); addValFilter.setInputFormat(labelInstances); Instances filteredlabelInstances = Filter.useFilter(labelInstances, addValFilter); Instances finalInstances = Instances.mergeInstances(instances, filteredlabelInstances); finalInstances.setClassIndex(finalInstances.numAttributes() - 1); instancesWithMeta.setInstances(finalInstances); return finalInstances; }
/** * use for regression training, validation, test data * * @param instancesWithMeta * @param labelInstances * @return * @throws Exception */ public static Instances addNumericLablesForRegression( Instances instances, AttributeFilterMeta instancesWithMeta, Instances labelInstances) throws Exception { Instances finalCleaned = Instances.mergeInstances(instances, labelInstances); finalCleaned.setClassIndex(finalCleaned.numAttributes() - 1); instancesWithMeta.setInstances(finalCleaned); return finalCleaned; }
/** * use for training data * * @param instancesWithMeta * @param labelInstances * @return * @throws Exception */ public static Instances addNominalLabelsForClassificationToTrainingData( Instances instances, AttributeFilterMeta instancesWithMeta, Instances labelInstances) throws Exception { Instances finalCleaned = Instances.mergeInstances(instances, labelInstances); finalCleaned.setClassIndex(finalCleaned.numAttributes() - 1); Attribute classAt = finalCleaned.classAttribute(); int numOfAttValues = classAt.numValues(); String attValues = ""; for (int nai = 0; nai < numOfAttValues; nai++) { if (nai != 0) { attValues += ","; } attValues += classAt.value(nai); } instancesWithMeta.setClassAtrributeValues(attValues); instancesWithMeta.setInstances(finalCleaned); return finalCleaned; }
/** * loading training data * * @param mergedAlgSimVec algebra instances normally for applying the remove useless and * standardization filtering * @return * @throws Exception */ public static AttributeFilterMeta refineInstances(Instances mergedAlgSimVec) throws Exception { AttributeFilterMeta res = new AttributeFilterMeta(); Map<String, Integer> attributeIndex = new HashMap<String, Integer>(); // List<Attribute> atts = new ArrayList<Attribute>(); // mergedAlgSimVecClass.attribute(0). for (int i = 0; i < mergedAlgSimVec.numAttributes(); i++) { attributeIndex.put(mergedAlgSimVec.attribute(i).name(), i); } Instances mergedUselessFilteredAlgSimVecClass = removeUseless(mergedAlgSimVec); Standardize stdFilter = new Standardize(); // kepp in meta stdFilter.setInputFormat( mergedUselessFilteredAlgSimVecClass); // initializing the filter once with training set Instances stdFilterdInstances = standardize(mergedUselessFilteredAlgSimVecClass, stdFilter); // record removed attributes/columns in a matrix Set<String> selectedAtt = new HashSet<String>(); for (int i = 0; i < stdFilterdInstances.numAttributes(); i++) { selectedAtt.add(stdFilterdInstances.attribute(i).name()); } List<Integer> deletedAttIndex = new ArrayList<Integer>(); for (Entry<String, Integer> e : attributeIndex.entrySet()) { if (selectedAtt.contains(e.getKey()) == false) { deletedAttIndex.add(e.getValue()); } } int[] removedAttributes = Ints.toArray(deletedAttIndex); res.setInstances(stdFilterdInstances); res.setRemovedAttributes(removedAttributes); res.setStandardizeFilter(stdFilter); return res; }