public static Map<String, List<LinkedHashMap<String, Object>>> similarDocumentMapForVector( GraphDatabaseService db, GraphManager graphManager, String input) { Map<String, List<LinkedHashMap<String, Object>>> documents; Map<String, List<LinkedHashMap<String, Object>>> results = new HashMap<>(); List<Integer> featureIndexList; VsmCacheModel vsmCacheModel = new VsmCacheModel(db).invoke(); featureIndexList = vsmCacheModel.getFeatureIndexList(); documents = vsmCacheModel.getDocuments(); List<Double> features = getFeatureVector(db, graphManager, input, featureIndexList); List<LinkedHashMap<String, Object>> resultList = new ArrayList<>(); LinkedHashMap<String, Double> classMap = new LinkedHashMap<>(); documents .keySet() .stream() .forEach( otherKey -> { List<Double> v2 = getWeightVectorForClass(documents, otherKey, featureIndexList, db); classMap.put(otherKey, cosineSimilarity(v2, features)); }); classMap .keySet() .stream() .forEach( ks -> { if (classMap.get(ks) > 0.0) { LinkedHashMap<String, Object> localMap = new LinkedHashMap<>(); localMap.put("class", ks); localMap.put("similarity", classMap.get(ks)); resultList.add(localMap); } }); try { resultList.sort( (a, b) -> { Double diff = (((double) a.get("similarity")) - ((double) b.get("similarity"))); return diff > 0 ? -1 : diff.equals(0.0) ? 0 : 1; }); } catch (NullPointerException ex) { // resultList is empty or null } results.put("classes", resultList); return results; }
public static Map<String, List<LinkedHashMap<String, Object>>> similarDocumentMapForClass( GraphDatabaseService db, String className) { Map<String, List<LinkedHashMap<String, Object>>> documents; Map<String, List<LinkedHashMap<String, Object>>> results = new HashMap<>(); List<Integer> featureIndexList; VsmCacheModel vsmCacheModel = new VsmCacheModel(db).invoke(); featureIndexList = vsmCacheModel.getFeatureIndexList(); documents = vsmCacheModel.getDocuments(); final String key = className; List<LinkedHashMap<String, Object>> resultList = new ArrayList<>(); LinkedHashMap<String, Double> classMap = new LinkedHashMap<>(); List<Double> v1 = getFeatureVectorForDocumentClass(documents, featureIndexList, key); documents .keySet() .stream() .filter(otherKey -> !key.equals(otherKey)) .forEach( otherKey -> { List<Double> v2 = getBinaryFeatureVectorForDocumentClass(documents, featureIndexList, otherKey); classMap.put(otherKey, cosineSimilarity(v1, v2)); }); classMap .keySet() .forEach( ks -> { if (!ks.equals(key) && classMap.get(ks) > 0.0) { LinkedHashMap<String, Object> localMap = new LinkedHashMap<>(); localMap.put("class", ks); localMap.put("similarity", classMap.get(ks)); resultList.add(localMap); } }); resultList.sort( (a, b) -> { Double diff = (((double) a.get("similarity")) - ((double) b.get("similarity"))); return diff > 0 ? -1 : diff.equals(0.0) ? 0 : 1; }); results.put("classes", resultList); return results; }
public static Map<String, Object> getCosineSimilarityVector(GraphDatabaseService db) { Map<String, List<LinkedHashMap<String, Object>>> documents = getFeaturesForAllClasses(db); Map<String, List<LinkedHashMap<String, Object>>> results = new HashMap<>(); List<Integer> featureIndexList = getFeatureIndexList(db); List<String> documentList = documents.keySet().stream().collect(Collectors.toList()); Collections.sort(documentList, (a, b) -> a.compareToIgnoreCase(b)); for (String key : documentList) { List<LinkedHashMap<String, Object>> resultList = new ArrayList<>(); LinkedHashMap<String, Double> classMap = new LinkedHashMap<>(); List<Double> v1 = featureIndexList .stream() .map(i -> documents.get(key).contains(i) ? featureIndexList.indexOf(i) : 0.0) .collect(Collectors.toList()); documents .keySet() .stream() .forEach( otherKey -> { List<Double> v2 = featureIndexList .stream() .map( i -> documents.get(otherKey).contains(i) ? featureIndexList.indexOf(i) : 0.0) .collect(Collectors.toList()); classMap.put(otherKey, cosineSimilarity(v1, v2)); }); final List<LinkedHashMap<String, Object>> finalResultList = resultList; classMap .keySet() .forEach( ks -> { LinkedHashMap<String, Object> localMap = new LinkedHashMap<>(); localMap.put("class", ks); localMap.put("similarity", classMap.get(ks)); finalResultList.add(localMap); }); Collections.sort( finalResultList, (a, b) -> ((String) a.get("class")).compareToIgnoreCase((String) b.get("class"))); results.put(key, finalResultList); } List<LinkedHashMap<String, Object>> similarityVector = new ArrayList<>(); for (String key : results.keySet()) { List<Double> cosineVector; cosineVector = results .get(key) .stream() .map(a -> Convert.toDouble(Math.round(100000 * (Double) a.get("similarity")))) .collect(Collectors.toList()); LinkedHashMap<String, Object> row = new LinkedHashMap<>(); row.put("class", key); row.put("vector", cosineVector); similarityVector.add(row); } Collections.sort( similarityVector, (a, b) -> ((String) a.get("class")).compareToIgnoreCase((String) b.get("class"))); Map<String, Object> vectorMap = new LinkedHashMap<>(); List<ArrayList<Double>> vectors = new ArrayList<>(); List<String> classNames = new ArrayList<>(); for (LinkedHashMap<String, Object> val : similarityVector) { vectors.add((ArrayList<Double>) val.get("vector")); classNames.add((String) val.get("class")); } vectorMap.put("classes", classNames); vectorMap.put("vectors", vectors); return vectorMap; }