public static void assignLabels(
      List<Cluster<DocumentDataElement>> clusterData, DocumentDataSet dataSet) {
    for (Cluster<DocumentDataElement> cluster : clusterData) {
      MinMaxPriorityQueue<TermEntry> queue =
          MinMaxPriorityQueue.orderedBy(
                  new Comparator<TermEntry>() {

                    @Override
                    public int compare(TermEntry o1, TermEntry o2) {
                      return -Double.compare(o1.getScore(), o2.getScore());
                    }
                  })
              .maximumSize(5)
              .create();

      DocumentCollection localCollection = new DocumentCollection();
      for (DocumentDataElement elem : cluster.getDataElements()) {
        localCollection.addDocument(elem.getDocument());
      }

      DocumentVSMGenerator docToVsm = new TFIDF();
      DocumentDataSet clusterDataSet = docToVsm.createVSM(localCollection);
      // TODO remove this
      try {
        CSVDataSetExporter.export(clusterDataSet, new File("tmp/" + cluster.getLabel() + ".csv"));
      } catch (IOException e) {
      }

      for (DocumentDataElement elem : clusterDataSet.elements()) {
        Document document = elem.getDocument();

        for (String term : document.getAllTerms()) {

          double termWeight = clusterDataSet.getTermWeight(document.getId(), term);
          queue.offer(new TermEntry(term, termWeight * getDocumentCount(term, cluster)));
        }
      }

      String label = "";
      StringBuilder labelBuilder = new StringBuilder();

      TreeSet<String> words = Sets.newTreeSet();

      // TODO this is a debug version of labels
      for (TermEntry termEntry : queue) {
        labelBuilder
            .append(termEntry.getTerm())
            .append(":")
            .append(String.format("%7.5f", termEntry.getScore()))
            .append(";")
            .append(getDocumentCount(termEntry.getTerm(), cluster))
            .append(",");

        words.add(termEntry.getTerm());
      }

      if (labelBuilder.length() > 0) {
        label = labelBuilder.substring(0, labelBuilder.length() - 1);
      }
      cluster.setLabel(words.toString());
    }
  }
示例#2
0
 @Override
 public void addUndirectedEdge(int source, int target, Map<String, Object> attributes) {
   this.hasUndirectedEdges = true;
   edges.offer(new Edge(source, target, attributes, false));
 }