Exemplo n.º 1
0
 /**
  * This method takes in all the unique terms present in our DB, calculates the Gini Coefficient of
  * each term. If the term satisfies both the Gini Coefficient and Support factor requirements,
  * it's considered to be a feature.
  *
  * @return
  */
 public ArrayList<String> calculateFeaturesList() {
   ArrayList<String> termsList = databaseConnector.getTermsList(false);
   ArrayList<String> userDataTermsList = databaseConnector.getTermsList(true);
   for (String term : userDataTermsList) {
     if (!termsList.contains(term)) {
       termsList.add(term);
     }
   }
   ArrayList<String> featuresList = new ArrayList<String>();
   for (String term : termsList) {
     double giniCoefficient = calculateGiniCoefficient(term);
     if (giniCoefficient >= GINI_THRESHOLD) {
       featuresList.add(term);
     }
   }
   return featuresList;
 }
Exemplo n.º 2
0
 /**
  * Simple method that calculates the gini values for each term and returns that mapping. Uses the
  * ValueComparator class to get the values in a descending order. Perfect for testing and
  * analysis.
  *
  * @return
  */
 public Map<String, Double> getGiniMapping() {
   ArrayList<String> termsList = databaseConnector.getTermsList(false);
   ArrayList<String> userDataTermsList = databaseConnector.getTermsList(true);
   for (String term : userDataTermsList) {
     if (!termsList.contains(term)) {
       termsList.add(term);
     }
   }
   Map<String, Double> termGiniMapping = new HashMap<String, Double>();
   ValueComparator valueComparator = new ValueComparator(termGiniMapping);
   TreeMap<String, Double> sortedTermGiniMapping = new TreeMap<String, Double>(valueComparator);
   for (String term : termsList) {
     double giniCoefficient = calculateGiniCoefficient(term);
     termGiniMapping.put(term, giniCoefficient);
   }
   sortedTermGiniMapping.putAll(termGiniMapping);
   return sortedTermGiniMapping;
 }