/** * Build the encodings against each categorical feature. * * @return a list of encodings - last value of the list represent the encodings for the response * variable. index - index of the feature value - Map<String, Integer> - map of unique values * of this feature and there encoded values. key - unique value value - encoded value */ private static List<Map<String, Integer>> buildEncodings( List<Feature> features, Map<String, String> summaryStats, List<Integer> newToOldIndicesList, int responseIndex) { List<Map<String, Integer>> encodings = new ArrayList<Map<String, Integer>>(); for (int i = 0; i < newToOldIndicesList.size() + 1; i++) { encodings.add(new HashMap<String, Integer>()); } for (Feature feature : features) { Map<String, Integer> encodingMap = new HashMap<String, Integer>(); if (feature.getType().equals(FeatureType.CATEGORICAL)) { List<String> uniqueVals = getUniqueValues(feature.getIndex(), summaryStats.get(feature.getName())); Collections.sort(uniqueVals); for (int i = 0; i < uniqueVals.size(); i++) { encodingMap.put(uniqueVals.get(i), i); } int newIndex = newToOldIndicesList.indexOf(feature.getIndex()); if (newIndex != -1) { encodings.set(newIndex, encodingMap); } else if (feature.getIndex() == responseIndex) { // response encoding at the end encodings.set(encodings.size() - 1, encodingMap); } } } return encodings; }
private String getTypeOfResponseVariable(String responseVariable, List<Feature> features) { String type = null; for (Feature feature : features) { if (feature.getName().equals(responseVariable)) { type = feature.getType(); } } return type; }