Java DDSimIF Examples

Programming Language: Java

Namespace/Package Name: edig.document.similarity

Class/Type: DDSimIF

Examples at hotexamples.com: 2

Java DDSimIF - 2 examples found. These are the top rated real world Java examples of edig.document.similarity.DDSimIF extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

calculateSimilarity(2)

Example #1

Show file

File: SinglePass.java Project: ahmad-bakr/EDIG

 /**
  * Calculate the average similarity to the cluster
  *
  * @param document the target document
  * @param cluster the target cluster
  * @param datasetHandler dataset handler
  * @param neo4jHandler neo4j handler
  * @return the average similarity
  * @throws IOException
  * @throws ClassNotFoundException
  */
 private double calculateAvgSimilairtyToCluster(
     Neo4jDocument document,
     Neo4jCluster cluster,
     DatasetLoader datasetHandler,
     Neo4jHandler neo4jHandler)
     throws IOException, ClassNotFoundException {
   double similairty = 0;
   DDSimIF similairtyCalculator = new DDSimilairty();
   ArrayList<Neo4jDocument> clusterDocuments =
       cluster.getDocumentsList(datasetHandler, neo4jHandler);
   for (Iterator iterator = clusterDocuments.iterator(); iterator.hasNext(); ) {
     Neo4jDocument neo4jDocument = (Neo4jDocument) iterator.next();
     similairty +=
         similairtyCalculator.calculateSimilarity(
             document, neo4jDocument, datasetHandler.numberOfDocuments());
   }
   return similairty / clusterDocuments.size();
 }

Example #2

Show file

File: SinglePass.java Project: ahmad-bakr/EDIG

  public Hashtable<String, Neo4jCluster> perform(
      DatasetLoader datasetHandler,
      Neo4jHandler neo4jHandler,
      double similairtyThreshold,
      int softClusteringThreshold)
      throws Exception {
    Hashtable<String, Neo4jCluster> clustersList = new Hashtable<String, Neo4jCluster>();
    Hashtable<String, Document> docsHash = datasetHandler.loadDocuments();
    DDSimIF similarityCalculator = new DDSimilairty();
    Enumeration e = docsHash.keys();
    int numberOfClusters = 0;
    // loop for documents in the dataset
    while (e.hasMoreElements()) {
      Hashtable<String, Double> candidateClustersHash = new Hashtable<String, Double>();
      String documentID = (String) e.nextElement();
      System.out.println("Processing document " + documentID);
      Document document = docsHash.get(documentID);
      Neo4jDocument neo4jDocument = neo4jHandler.loadDocument(document);
      boolean clusteredYet = false;
      // get similar documents to the document
      ArrayList<Neo4jDocument> similarDocuments =
          getSimilarDocuments(neo4jDocument, neo4jHandler, datasetHandler);
      // loop over the similar documents
      for (Iterator iterator = similarDocuments.iterator(); iterator.hasNext(); ) {

        Neo4jDocument neo4jSimilarDocument = (Neo4jDocument) iterator.next();
        // continue if the current similar document has no clusters
        if (neo4jSimilarDocument.getClustersHash().isEmpty()) continue;
        // check if the distance to the document is greater than the threshold
        if (similarityCalculator.calculateSimilarity(
                neo4jDocument, neo4jSimilarDocument, datasetHandler.numberOfDocuments())
            > similairtyThreshold) {
          // get the clusters of the similar document
          ArrayList<String> candidateDocumentClustersIDs = neo4jSimilarDocument.getClusterIDsList();
          // loop over the clusters of the similar document
          for (Iterator iterator2 = candidateDocumentClustersIDs.iterator();
              iterator2.hasNext(); ) { // loop for candidate clusters
            String candidateClusterID = (String) iterator2.next();
            // get the cluster
            Neo4jCluster candidateNeo4jCluster = clustersList.get(candidateClusterID);
            // calculate the average similarity to the cluster
            double averageSimilairtyToCluster =
                calculateAvgSimilairtyToCluster(
                    neo4jDocument, candidateNeo4jCluster, datasetHandler, neo4jHandler);
            // if the average similarity greater the the threshold then the cluster to the candidate
            // clusters
            if (averageSimilairtyToCluster > similairtyThreshold) {
              clusteredYet = true;
              candidateClustersHash.put(candidateClusterID, averageSimilairtyToCluster);
            } // end if adding cluster to candidate cluster hash
          } // end loop for candidate clusters
        } // end if [checking if the distance to the candidate document is less than the threshold]
      } // end looping for similar documents

      if (!clusteredYet) { // create new cluster
        numberOfClusters++;
        Neo4jCluster newCluster = new Neo4jCluster(String.valueOf(numberOfClusters));
        newCluster.addDcoument(documentID);
        neo4jDocument.addCluster(newCluster.getId(), 1);
        clustersList.put(newCluster.getId(), newCluster);
      } else { // add to the cloeset cluster
        String nearestClusterID = getNearestCluster(candidateClustersHash);
        Neo4jCluster cluster = clustersList.get(nearestClusterID);
        cluster.addDcoument(neo4jDocument.getDocumentID());
        neo4jDocument.addCluster(nearestClusterID, 1);
      }
    } // end loop for all documents in the data set

    return clustersList;
  }