Exemplo n.º 1
0
 private void print(SpectralClustering cluster) {
   Map<Integer, List<Doc>> map = new HashMap<Integer, List<Doc>>();
   int[] lab = cluster.getClusterLabel();
   for (int x = 0; x < lab.length; x++) {
     final Doc doc = list.get(x);
     if (!map.containsKey(new Integer(lab[x]))) {
       map.put(
           new Integer(lab[x]),
           new ArrayList<Doc>() {
             {
               add(doc);
             }
           });
     } else {
       map.get(new Integer(lab[x])).add(doc);
     }
   }
   for (Map.Entry<Integer, List<Doc>> e : map.entrySet()) {
     System.out.println("type:" + e.getKey());
     if (e.getValue().size() < 10) {
       for (Doc d : e.getValue()) {
         System.out.println("-------------------------------------");
         System.out.println("url:" + d.url);
         System.out.println("title:" + d.title);
         System.out.println("doc:" + d.text);
       }
     }
     System.out.println("==============================");
   }
 }
Exemplo n.º 2
0
 private void writeFile(SpectralClustering cluster) {
   FileUtil.clean(output);
   int[] lab = cluster.getClusterLabel();
   for (int x = 0; x < lab.length; x++) {
     final Doc doc = list.get(x);
     String one = doc.url + "\n" + doc.title + "\n" + doc.text + "\n" + hr;
     FileUtil.write(new File(output, lab[x] + ".txt"), one, true);
   }
 }
Exemplo n.º 3
0
  private void writeSolr(SpectralClustering cluster) {
    Map<Integer, List<Doc>> map = new HashMap<Integer, List<Doc>>();
    int[] lab = cluster.getClusterLabel();
    for (int x = 0; x < lab.length; x++) {
      final Doc doc = list.get(x);
      if (!map.containsKey(new Integer(lab[x]))) {
        map.put(
            new Integer(lab[x]),
            new ArrayList<Doc>() {
              {
                add(doc);
              }
            });
      } else {
        map.get(new Integer(lab[x])).add(doc);
      }
    }
    for (Map.Entry<Integer, List<Doc>> e : map.entrySet()) {
      System.out.println("type:" + e.getKey());

      if (e.getValue().size() < 10) {
        StringBuffer query = new StringBuffer();
        for (Doc doc : e.getValue()) {
          query.append("url:\"").append(doc.url).append("\"");
          query.append(" OR ");
        }
        query.delete(query.lastIndexOf("OR"), query.length());
        String groupId = UUID.nameUUIDFromBytes(query.toString().getBytes()).toString();
        List<Map<String, Object>> toSave = new ArrayList<Map<String, Object>>();
        System.out.println("group:" + groupId + " = " + e.getValue().size());
        List<SolrDocument> list =
            (List<SolrDocument>) indexDao.sortList(query.toString(), 1, 100, "infoTime_dt desc");
        Date newest = new Date(0);
        int useful = 1;
        for (SolrDocument doc : list) {
          Date date = (Date) doc.get("infoTime_dt");
          if (date.getTime() > newest.getTime()) {}

          Map<String, Object> inputDoc = new HashMap<String, Object>(doc);
          inputDoc.put("useful_i", useful);
          inputDoc.put("sim_i", list.size());
          inputDoc.put("group_s", groupId);
          toSave.add(inputDoc);
          useful = 0;
        }
        indexDao.addIndex(toSave);
      }
    }
  }