Exemple #1
0
  private Double CosineSimilarity(HashMap<String, Double> table1, HashMap<String, Double> table2)
      throws Exception {
    if (table1.size() != table2.size()) {
      throw new Exception("Table sizes must be equal");
    }

    // length of table 1
    double length1 = 0;
    double length2 = 0;

    // Double firstValue;
    double secValue;

    // sum of vector multiplication
    double svMul = 0;

    for (Entry<String, Double> kv : table1.entrySet()) {
      length1 += Math.pow(kv.getValue(), 2);

      secValue = table2.get(kv.getKey());
      length2 += Math.pow(secValue, 2);

      svMul += secValue * kv.getValue();
    }

    length1 = Math.sqrt(length1);
    length2 = Math.sqrt(length2);

    return Double.parseDouble(NumericFormat.getNumberFormated(svMul / (length1 * length2)));
  }
Exemple #2
0
  public Double GetCosineSimilarity() throws Exception {
    HashMap<String, Double> frequencyTable1 = PrepareFrequency(words1);
    HashMap<String, Double> frequencyTable2 = PrepareFrequency(words2);

    HashMap<String, Double> tfTable1 = TfFactorized(frequencyTable1);
    HashMap<String, Double> tfTable2 = TfFactorized(frequencyTable2);

    LinkedList<HashMap<String, Double>> tables = new LinkedList<HashMap<String, Double>>();
    tables.add(tfTable1);
    tables.add(tfTable2);

    PrepareAllHashTables(tables);

    tables = GetPreparedTFIDFTables(IDFDocumentTable(tables), tables);

    return Double.parseDouble(
        NumericFormat.getNumberFormated(CosineSimilarity(tables.get(0), tables.get(1))));
  }