/** * This method takes in all the unique terms present in our DB, calculates the Gini Coefficient of * each term. If the term satisfies both the Gini Coefficient and Support factor requirements, * it's considered to be a feature. * * @return */ public ArrayList<String> calculateFeaturesList() { ArrayList<String> termsList = databaseConnector.getTermsList(false); ArrayList<String> userDataTermsList = databaseConnector.getTermsList(true); for (String term : userDataTermsList) { if (!termsList.contains(term)) { termsList.add(term); } } ArrayList<String> featuresList = new ArrayList<String>(); for (String term : termsList) { double giniCoefficient = calculateGiniCoefficient(term); if (giniCoefficient >= GINI_THRESHOLD) { featuresList.add(term); } } return featuresList; }
/** * Simple method that calculates the gini values for each term and returns that mapping. Uses the * ValueComparator class to get the values in a descending order. Perfect for testing and * analysis. * * @return */ public Map<String, Double> getGiniMapping() { ArrayList<String> termsList = databaseConnector.getTermsList(false); ArrayList<String> userDataTermsList = databaseConnector.getTermsList(true); for (String term : userDataTermsList) { if (!termsList.contains(term)) { termsList.add(term); } } Map<String, Double> termGiniMapping = new HashMap<String, Double>(); ValueComparator valueComparator = new ValueComparator(termGiniMapping); TreeMap<String, Double> sortedTermGiniMapping = new TreeMap<String, Double>(valueComparator); for (String term : termsList) { double giniCoefficient = calculateGiniCoefficient(term); termGiniMapping.put(term, giniCoefficient); } sortedTermGiniMapping.putAll(termGiniMapping); return sortedTermGiniMapping; }