private void print(SpectralClustering cluster) { Map<Integer, List<Doc>> map = new HashMap<Integer, List<Doc>>(); int[] lab = cluster.getClusterLabel(); for (int x = 0; x < lab.length; x++) { final Doc doc = list.get(x); if (!map.containsKey(new Integer(lab[x]))) { map.put( new Integer(lab[x]), new ArrayList<Doc>() { { add(doc); } }); } else { map.get(new Integer(lab[x])).add(doc); } } for (Map.Entry<Integer, List<Doc>> e : map.entrySet()) { System.out.println("type:" + e.getKey()); if (e.getValue().size() < 10) { for (Doc d : e.getValue()) { System.out.println("-------------------------------------"); System.out.println("url:" + d.url); System.out.println("title:" + d.title); System.out.println("doc:" + d.text); } } System.out.println("=============================="); } }
private void writeFile(SpectralClustering cluster) { FileUtil.clean(output); int[] lab = cluster.getClusterLabel(); for (int x = 0; x < lab.length; x++) { final Doc doc = list.get(x); String one = doc.url + "\n" + doc.title + "\n" + doc.text + "\n" + hr; FileUtil.write(new File(output, lab[x] + ".txt"), one, true); } }
private void writeSolr(SpectralClustering cluster) { Map<Integer, List<Doc>> map = new HashMap<Integer, List<Doc>>(); int[] lab = cluster.getClusterLabel(); for (int x = 0; x < lab.length; x++) { final Doc doc = list.get(x); if (!map.containsKey(new Integer(lab[x]))) { map.put( new Integer(lab[x]), new ArrayList<Doc>() { { add(doc); } }); } else { map.get(new Integer(lab[x])).add(doc); } } for (Map.Entry<Integer, List<Doc>> e : map.entrySet()) { System.out.println("type:" + e.getKey()); if (e.getValue().size() < 10) { StringBuffer query = new StringBuffer(); for (Doc doc : e.getValue()) { query.append("url:\"").append(doc.url).append("\""); query.append(" OR "); } query.delete(query.lastIndexOf("OR"), query.length()); String groupId = UUID.nameUUIDFromBytes(query.toString().getBytes()).toString(); List<Map<String, Object>> toSave = new ArrayList<Map<String, Object>>(); System.out.println("group:" + groupId + " = " + e.getValue().size()); List<SolrDocument> list = (List<SolrDocument>) indexDao.sortList(query.toString(), 1, 100, "infoTime_dt desc"); Date newest = new Date(0); int useful = 1; for (SolrDocument doc : list) { Date date = (Date) doc.get("infoTime_dt"); if (date.getTime() > newest.getTime()) {} Map<String, Object> inputDoc = new HashMap<String, Object>(doc); inputDoc.put("useful_i", useful); inputDoc.put("sim_i", list.size()); inputDoc.put("group_s", groupId); toSave.add(inputDoc); useful = 0; } indexDao.addIndex(toSave); } } }