private static double normVector(List<Double> v1) { double result = 0.0; for (Double aV1 : v1) { result = result + (aV1 * aV1); } return Math.sqrt(result); }
public static double getInverseDocumentFrequency(GraphDatabaseService db, Long featureId) { Double idf; Double d = ((Integer) getDocumentSize(db)).doubleValue(); Double dt = ((Integer) getDocumentSizeForFeature(db, featureId)).doubleValue(); idf = Math.log(d / dt); return idf; }
public static Map<String, Object> getCosineSimilarityVector(GraphDatabaseService db) { Map<String, List<LinkedHashMap<String, Object>>> documents = getFeaturesForAllClasses(db); Map<String, List<LinkedHashMap<String, Object>>> results = new HashMap<>(); List<Integer> featureIndexList = getFeatureIndexList(db); List<String> documentList = documents.keySet().stream().collect(Collectors.toList()); Collections.sort(documentList, (a, b) -> a.compareToIgnoreCase(b)); for (String key : documentList) { List<LinkedHashMap<String, Object>> resultList = new ArrayList<>(); LinkedHashMap<String, Double> classMap = new LinkedHashMap<>(); List<Double> v1 = featureIndexList .stream() .map(i -> documents.get(key).contains(i) ? featureIndexList.indexOf(i) : 0.0) .collect(Collectors.toList()); documents .keySet() .stream() .forEach( otherKey -> { List<Double> v2 = featureIndexList .stream() .map( i -> documents.get(otherKey).contains(i) ? featureIndexList.indexOf(i) : 0.0) .collect(Collectors.toList()); classMap.put(otherKey, cosineSimilarity(v1, v2)); }); final List<LinkedHashMap<String, Object>> finalResultList = resultList; classMap .keySet() .forEach( ks -> { LinkedHashMap<String, Object> localMap = new LinkedHashMap<>(); localMap.put("class", ks); localMap.put("similarity", classMap.get(ks)); finalResultList.add(localMap); }); Collections.sort( finalResultList, (a, b) -> ((String) a.get("class")).compareToIgnoreCase((String) b.get("class"))); results.put(key, finalResultList); } List<LinkedHashMap<String, Object>> similarityVector = new ArrayList<>(); for (String key : results.keySet()) { List<Double> cosineVector; cosineVector = results .get(key) .stream() .map(a -> Convert.toDouble(Math.round(100000 * (Double) a.get("similarity")))) .collect(Collectors.toList()); LinkedHashMap<String, Object> row = new LinkedHashMap<>(); row.put("class", key); row.put("vector", cosineVector); similarityVector.add(row); } Collections.sort( similarityVector, (a, b) -> ((String) a.get("class")).compareToIgnoreCase((String) b.get("class"))); Map<String, Object> vectorMap = new LinkedHashMap<>(); List<ArrayList<Double>> vectors = new ArrayList<>(); List<String> classNames = new ArrayList<>(); for (LinkedHashMap<String, Object> val : similarityVector) { vectors.add((ArrayList<Double>) val.get("vector")); classNames.add((String) val.get("class")); } vectorMap.put("classes", classNames); vectorMap.put("vectors", vectors); return vectorMap; }