예제 #1
0
  private static double normVector(List<Double> v1) {
    double result = 0.0;

    for (Double aV1 : v1) {
      result = result + (aV1 * aV1);
    }

    return Math.sqrt(result);
  }
예제 #2
0
  public static double getInverseDocumentFrequency(GraphDatabaseService db, Long featureId) {
    Double idf;

    Double d = ((Integer) getDocumentSize(db)).doubleValue();
    Double dt = ((Integer) getDocumentSizeForFeature(db, featureId)).doubleValue();

    idf = Math.log(d / dt);

    return idf;
  }
예제 #3
0
  public static Map<String, Object> getCosineSimilarityVector(GraphDatabaseService db) {
    Map<String, List<LinkedHashMap<String, Object>>> documents = getFeaturesForAllClasses(db);
    Map<String, List<LinkedHashMap<String, Object>>> results = new HashMap<>();
    List<Integer> featureIndexList = getFeatureIndexList(db);

    List<String> documentList = documents.keySet().stream().collect(Collectors.toList());

    Collections.sort(documentList, (a, b) -> a.compareToIgnoreCase(b));

    for (String key : documentList) {
      List<LinkedHashMap<String, Object>> resultList = new ArrayList<>();
      LinkedHashMap<String, Double> classMap = new LinkedHashMap<>();

      List<Double> v1 =
          featureIndexList
              .stream()
              .map(i -> documents.get(key).contains(i) ? featureIndexList.indexOf(i) : 0.0)
              .collect(Collectors.toList());
      documents
          .keySet()
          .stream()
          .forEach(
              otherKey -> {
                List<Double> v2 =
                    featureIndexList
                        .stream()
                        .map(
                            i ->
                                documents.get(otherKey).contains(i)
                                    ? featureIndexList.indexOf(i)
                                    : 0.0)
                        .collect(Collectors.toList());
                classMap.put(otherKey, cosineSimilarity(v1, v2));
              });

      final List<LinkedHashMap<String, Object>> finalResultList = resultList;
      classMap
          .keySet()
          .forEach(
              ks -> {
                LinkedHashMap<String, Object> localMap = new LinkedHashMap<>();
                localMap.put("class", ks);
                localMap.put("similarity", classMap.get(ks));
                finalResultList.add(localMap);
              });

      Collections.sort(
          finalResultList,
          (a, b) -> ((String) a.get("class")).compareToIgnoreCase((String) b.get("class")));
      results.put(key, finalResultList);
    }

    List<LinkedHashMap<String, Object>> similarityVector = new ArrayList<>();

    for (String key : results.keySet()) {
      List<Double> cosineVector;
      cosineVector =
          results
              .get(key)
              .stream()
              .map(a -> Convert.toDouble(Math.round(100000 * (Double) a.get("similarity"))))
              .collect(Collectors.toList());
      LinkedHashMap<String, Object> row = new LinkedHashMap<>();
      row.put("class", key);
      row.put("vector", cosineVector);
      similarityVector.add(row);
    }

    Collections.sort(
        similarityVector,
        (a, b) -> ((String) a.get("class")).compareToIgnoreCase((String) b.get("class")));

    Map<String, Object> vectorMap = new LinkedHashMap<>();

    List<ArrayList<Double>> vectors = new ArrayList<>();
    List<String> classNames = new ArrayList<>();

    for (LinkedHashMap<String, Object> val : similarityVector) {
      vectors.add((ArrayList<Double>) val.get("vector"));
      classNames.add((String) val.get("class"));
    }

    vectorMap.put("classes", classNames);
    vectorMap.put("vectors", vectors);

    return vectorMap;
  }