private void writeFile(SpectralClustering cluster) { FileUtil.clean(output); int[] lab = cluster.getClusterLabel(); for (int x = 0; x < lab.length; x++) { final Doc doc = list.get(x); String one = doc.url + "\n" + doc.title + "\n" + doc.text + "\n" + hr; FileUtil.write(new File(output, lab[x] + ".txt"), one, true); } }
private void _dimension() { FileUtil.clean(root); int x = 0; for (File file : root1.listFiles()) { Doc doc = buildDoc(x, file); // System.out.println(doc.vector.getTFIDFValues()); Iterator<Map.Entry<String, Double>> it = doc.vector.getTFIDFValues().entrySet().iterator(); for (; it.hasNext(); ) { if (it.next().getValue() < 0.05) { it.remove(); } } String str = StringUtils.join(doc.vector.getTFIDFValues().keySet().iterator(), " "); str = doc.url + "\n" + doc.title + "\n" + str + "\n" + doc.text; FileUtil.write(new File(root, file.getName()), str, false); x++; } }
private Doc buildDoc(int x, File file) { double minScore = 0.05; String s = FileUtil.read(file); String[] arr = s.split("\n"); WVTWordVector vector = service.getNativeVector(s); if (filter) { Map<String, Double> newMap = new HashMap<String, Double>(); for (Map.Entry<String, Double> entry : vector.getTFIDFValues().entrySet()) { if (entry.getValue() < minScore) { newMap.put(entry.getKey(), 0D); } } double[] values = vector.getValues(); for (int i = 0; i < values.length; i++) { if (values[i] < minScore) { values[i] = 0D; } } } return new Doc(vector, file, x, arr[2], arr[0], arr[1]); }