예제 #1
0
  /**
   * Build the encodings against each categorical feature.
   *
   * @return a list of encodings - last value of the list represent the encodings for the response
   *     variable. index - index of the feature value - Map<String, Integer> - map of unique values
   *     of this feature and there encoded values. key - unique value value - encoded value
   */
  private static List<Map<String, Integer>> buildEncodings(
      List<Feature> features,
      Map<String, String> summaryStats,
      List<Integer> newToOldIndicesList,
      int responseIndex) {
    List<Map<String, Integer>> encodings = new ArrayList<Map<String, Integer>>();
    for (int i = 0; i < newToOldIndicesList.size() + 1; i++) {
      encodings.add(new HashMap<String, Integer>());
    }
    for (Feature feature : features) {
      Map<String, Integer> encodingMap = new HashMap<String, Integer>();
      if (feature.getType().equals(FeatureType.CATEGORICAL)) {
        List<String> uniqueVals =
            getUniqueValues(feature.getIndex(), summaryStats.get(feature.getName()));
        Collections.sort(uniqueVals);
        for (int i = 0; i < uniqueVals.size(); i++) {
          encodingMap.put(uniqueVals.get(i), i);
        }
        int newIndex = newToOldIndicesList.indexOf(feature.getIndex());
        if (newIndex != -1) {
          encodings.set(newIndex, encodingMap);
        } else if (feature.getIndex() == responseIndex) {
          // response encoding at the end
          encodings.set(encodings.size() - 1, encodingMap);
        }
      }
    }

    return encodings;
  }
 private String getTypeOfResponseVariable(String responseVariable, List<Feature> features) {
   String type = null;
   for (Feature feature : features) {
     if (feature.getName().equals(responseVariable)) {
       type = feature.getType();
     }
   }
   return type;
 }