private SpectralClustering cluster(double[][] data, int k) { long clock = System.currentTimeMillis(); SpectralClustering cluster = new SpectralClustering(data, k, 0.27); System.out.format( "DBSCAN clusterings %d samples in %dms\n", data.length, System.currentTimeMillis() - clock); System.out.println("getNumClusters:" + cluster.getNumClusters()); System.out.println("getClusterSize:" + cluster.getClusterSize()); // System.out.println(JSON.toJSONString(dbscan.getClusterSize())); System.out.println("toString:" + cluster.toString()); return cluster; }
public void main() { double[][] data = buildData(); SpectralClustering clustering = cluster(data, data.length - 1); double[] eigenValues = clustering.getEigen().getEigenValues(); int count = bestCount(eigenValues); // for (;count<100;){ clustering = cluster(data, count); // } // SpectralClustering clustering = cluster(data, 379); print(clustering); writeFile(clustering); writeSolr(clustering); System.currentTimeMillis(); }
private void print(SpectralClustering cluster) { Map<Integer, List<Doc>> map = new HashMap<Integer, List<Doc>>(); int[] lab = cluster.getClusterLabel(); for (int x = 0; x < lab.length; x++) { final Doc doc = list.get(x); if (!map.containsKey(new Integer(lab[x]))) { map.put( new Integer(lab[x]), new ArrayList<Doc>() { { add(doc); } }); } else { map.get(new Integer(lab[x])).add(doc); } } for (Map.Entry<Integer, List<Doc>> e : map.entrySet()) { System.out.println("type:" + e.getKey()); if (e.getValue().size() < 10) { for (Doc d : e.getValue()) { System.out.println("-------------------------------------"); System.out.println("url:" + d.url); System.out.println("title:" + d.title); System.out.println("doc:" + d.text); } } System.out.println("=============================="); } }
private void writeFile(SpectralClustering cluster) { FileUtil.clean(output); int[] lab = cluster.getClusterLabel(); for (int x = 0; x < lab.length; x++) { final Doc doc = list.get(x); String one = doc.url + "\n" + doc.title + "\n" + doc.text + "\n" + hr; FileUtil.write(new File(output, lab[x] + ".txt"), one, true); } }
private void _cluster(double[][] data, int k) { long clock = System.currentTimeMillis(); SpectralClustering cluster = new SpectralClustering(data, k, 0.355); System.out.format( "DBSCAN clusterings %d samples in %dms\n", data.length, System.currentTimeMillis() - clock); System.out.println("getNumClusters:" + cluster.getNumClusters()); System.out.println("getClusterSize:" + cluster.getClusterSize()); // System.out.println(JSON.toJSONString(dbscan.getClusterSize())); System.out.println("toString:" + cluster.toString()); /** ************************************************************ */ boolean more = true; EigenValueDecomposition eigen = cluster.getEigen(); double[] lab = eigen.getEigenValues(); double sd = smile.math.Math.sd(eigen.getEigenValues()); System.out.println("sd(eigen.getEigenValues()):" + sd); if (Math.min(eigen.getEigenValues()) > 0.3) { result = cluster; cluster(data, k + 1); } else { return; } }
private void writeSolr(SpectralClustering cluster) { Map<Integer, List<Doc>> map = new HashMap<Integer, List<Doc>>(); int[] lab = cluster.getClusterLabel(); for (int x = 0; x < lab.length; x++) { final Doc doc = list.get(x); if (!map.containsKey(new Integer(lab[x]))) { map.put( new Integer(lab[x]), new ArrayList<Doc>() { { add(doc); } }); } else { map.get(new Integer(lab[x])).add(doc); } } for (Map.Entry<Integer, List<Doc>> e : map.entrySet()) { System.out.println("type:" + e.getKey()); if (e.getValue().size() < 10) { StringBuffer query = new StringBuffer(); for (Doc doc : e.getValue()) { query.append("url:\"").append(doc.url).append("\""); query.append(" OR "); } query.delete(query.lastIndexOf("OR"), query.length()); String groupId = UUID.nameUUIDFromBytes(query.toString().getBytes()).toString(); List<Map<String, Object>> toSave = new ArrayList<Map<String, Object>>(); System.out.println("group:" + groupId + " = " + e.getValue().size()); List<SolrDocument> list = (List<SolrDocument>) indexDao.sortList(query.toString(), 1, 100, "infoTime_dt desc"); Date newest = new Date(0); int useful = 1; for (SolrDocument doc : list) { Date date = (Date) doc.get("infoTime_dt"); if (date.getTime() > newest.getTime()) {} Map<String, Object> inputDoc = new HashMap<String, Object>(doc); inputDoc.put("useful_i", useful); inputDoc.put("sim_i", list.size()); inputDoc.put("group_s", groupId); toSave.add(inputDoc); useful = 0; } indexDao.addIndex(toSave); } } }