コード例 #1
0
  public static Map<String, List<LinkedHashMap<String, Object>>> similarDocumentMapForVector(
      GraphDatabaseService db, GraphManager graphManager, String input) {
    Map<String, List<LinkedHashMap<String, Object>>> documents;
    Map<String, List<LinkedHashMap<String, Object>>> results = new HashMap<>();
    List<Integer> featureIndexList;

    VsmCacheModel vsmCacheModel = new VsmCacheModel(db).invoke();
    featureIndexList = vsmCacheModel.getFeatureIndexList();
    documents = vsmCacheModel.getDocuments();

    List<Double> features = getFeatureVector(db, graphManager, input, featureIndexList);

    List<LinkedHashMap<String, Object>> resultList = new ArrayList<>();
    LinkedHashMap<String, Double> classMap = new LinkedHashMap<>();

    documents
        .keySet()
        .stream()
        .forEach(
            otherKey -> {
              List<Double> v2 = getWeightVectorForClass(documents, otherKey, featureIndexList, db);
              classMap.put(otherKey, cosineSimilarity(v2, features));
            });

    classMap
        .keySet()
        .stream()
        .forEach(
            ks -> {
              if (classMap.get(ks) > 0.0) {
                LinkedHashMap<String, Object> localMap = new LinkedHashMap<>();
                localMap.put("class", ks);
                localMap.put("similarity", classMap.get(ks));
                resultList.add(localMap);
              }
            });

    try {
      resultList.sort(
          (a, b) -> {
            Double diff = (((double) a.get("similarity")) - ((double) b.get("similarity")));
            return diff > 0 ? -1 : diff.equals(0.0) ? 0 : 1;
          });
    } catch (NullPointerException ex) {
      // resultList is empty or null
    }

    results.put("classes", resultList);

    return results;
  }
コード例 #2
0
  public static Map<String, List<LinkedHashMap<String, Object>>> similarDocumentMapForClass(
      GraphDatabaseService db, String className) {

    Map<String, List<LinkedHashMap<String, Object>>> documents;
    Map<String, List<LinkedHashMap<String, Object>>> results = new HashMap<>();
    List<Integer> featureIndexList;

    VsmCacheModel vsmCacheModel = new VsmCacheModel(db).invoke();
    featureIndexList = vsmCacheModel.getFeatureIndexList();
    documents = vsmCacheModel.getDocuments();

    final String key = className;

    List<LinkedHashMap<String, Object>> resultList = new ArrayList<>();
    LinkedHashMap<String, Double> classMap = new LinkedHashMap<>();

    List<Double> v1 = getFeatureVectorForDocumentClass(documents, featureIndexList, key);

    documents
        .keySet()
        .stream()
        .filter(otherKey -> !key.equals(otherKey))
        .forEach(
            otherKey -> {
              List<Double> v2 =
                  getBinaryFeatureVectorForDocumentClass(documents, featureIndexList, otherKey);
              classMap.put(otherKey, cosineSimilarity(v1, v2));
            });

    classMap
        .keySet()
        .forEach(
            ks -> {
              if (!ks.equals(key) && classMap.get(ks) > 0.0) {
                LinkedHashMap<String, Object> localMap = new LinkedHashMap<>();
                localMap.put("class", ks);
                localMap.put("similarity", classMap.get(ks));
                resultList.add(localMap);
              }
            });

    resultList.sort(
        (a, b) -> {
          Double diff = (((double) a.get("similarity")) - ((double) b.get("similarity")));
          return diff > 0 ? -1 : diff.equals(0.0) ? 0 : 1;
        });

    results.put("classes", resultList);

    return results;
  }
コード例 #3
0
  public static Map<String, Object> getCosineSimilarityVector(GraphDatabaseService db) {
    Map<String, List<LinkedHashMap<String, Object>>> documents = getFeaturesForAllClasses(db);
    Map<String, List<LinkedHashMap<String, Object>>> results = new HashMap<>();
    List<Integer> featureIndexList = getFeatureIndexList(db);

    List<String> documentList = documents.keySet().stream().collect(Collectors.toList());

    Collections.sort(documentList, (a, b) -> a.compareToIgnoreCase(b));

    for (String key : documentList) {
      List<LinkedHashMap<String, Object>> resultList = new ArrayList<>();
      LinkedHashMap<String, Double> classMap = new LinkedHashMap<>();

      List<Double> v1 =
          featureIndexList
              .stream()
              .map(i -> documents.get(key).contains(i) ? featureIndexList.indexOf(i) : 0.0)
              .collect(Collectors.toList());
      documents
          .keySet()
          .stream()
          .forEach(
              otherKey -> {
                List<Double> v2 =
                    featureIndexList
                        .stream()
                        .map(
                            i ->
                                documents.get(otherKey).contains(i)
                                    ? featureIndexList.indexOf(i)
                                    : 0.0)
                        .collect(Collectors.toList());
                classMap.put(otherKey, cosineSimilarity(v1, v2));
              });

      final List<LinkedHashMap<String, Object>> finalResultList = resultList;
      classMap
          .keySet()
          .forEach(
              ks -> {
                LinkedHashMap<String, Object> localMap = new LinkedHashMap<>();
                localMap.put("class", ks);
                localMap.put("similarity", classMap.get(ks));
                finalResultList.add(localMap);
              });

      Collections.sort(
          finalResultList,
          (a, b) -> ((String) a.get("class")).compareToIgnoreCase((String) b.get("class")));
      results.put(key, finalResultList);
    }

    List<LinkedHashMap<String, Object>> similarityVector = new ArrayList<>();

    for (String key : results.keySet()) {
      List<Double> cosineVector;
      cosineVector =
          results
              .get(key)
              .stream()
              .map(a -> Convert.toDouble(Math.round(100000 * (Double) a.get("similarity"))))
              .collect(Collectors.toList());
      LinkedHashMap<String, Object> row = new LinkedHashMap<>();
      row.put("class", key);
      row.put("vector", cosineVector);
      similarityVector.add(row);
    }

    Collections.sort(
        similarityVector,
        (a, b) -> ((String) a.get("class")).compareToIgnoreCase((String) b.get("class")));

    Map<String, Object> vectorMap = new LinkedHashMap<>();

    List<ArrayList<Double>> vectors = new ArrayList<>();
    List<String> classNames = new ArrayList<>();

    for (LinkedHashMap<String, Object> val : similarityVector) {
      vectors.add((ArrayList<Double>) val.get("vector"));
      classNames.add((String) val.get("class"));
    }

    vectorMap.put("classes", classNames);
    vectorMap.put("vectors", vectors);

    return vectorMap;
  }