public void endDocument() throws SAXException { // time to actually set up the edges IntIterator rows = m_edges.rows(); while (rows.hasNext()) { int r = rows.nextInt(); String src = m_edges.getString(r, SRCID); if (!m_nodeMap.containsKey(src)) { throw new SAXException( "Tried to create edge with source node id=" + src + " which does not exist."); } int s = ((Integer) m_nodeMap.get(src)).intValue(); m_edges.setInt(r, SRC, s); String trg = m_edges.getString(r, TRGID); if (!m_nodeMap.containsKey(trg)) { throw new SAXException( "Tried to create edge with target node id=" + trg + " which does not exist."); } int t = ((Integer) m_nodeMap.get(trg)).intValue(); m_edges.setInt(r, TRG, t); } m_edges.removeColumn(SRCID); m_edges.removeColumn(TRGID); // now create the graph m_graph = new Graph(m_nodes, m_edges, m_directed); if (m_graphid != null) m_graph.putClientProperty(ID, m_graphid); }
private float compareNodesCaseSensitiveBy( String attributeColumn, Integer nodeOneIndex, Integer nodeTwoIndex, Table nodeTable) { String nodeOneAttribute = (String) nodeTable.getString(nodeOneIndex.intValue(), attributeColumn); String nodeTwoAttribute = (String) nodeTable.getString(nodeTwoIndex.intValue(), attributeColumn); float similarity = this.similarityChecker.getSimilarity(nodeOneAttribute, nodeTwoAttribute); return similarity; }
/** @see prefuse.data.io.TableWriter#writeTable(prefuse.data.Table, java.io.OutputStream) */ public void writeTable(Table table, OutputStream os) throws DataIOException { try { // get print stream PrintWriter out = new PrintWriter(new OutputStreamWriter(os, "UTF-8")); // write out header row if (m_printHeader) { for (int i = 0; i < table.getColumnCount(); ++i) { if (i > 0) out.print(','); out.print(makeCSVSafe(table.getColumnName(i))); } out.println(); } // write out data for (IntIterator rows = table.rows(); rows.hasNext(); ) { int row = rows.nextInt(); for (int i = 0; i < table.getColumnCount(); ++i) { if (i > 0) out.print(','); String str = table.getString(row, table.getColumnName(i)); out.print(makeCSVSafe(str)); } out.println(); } // finish up out.flush(); } catch (Exception e) { throw new DataIOException(e); } }
// this mutates nodeTable private Table makeMergeTable(Table nodeTable, Graph mergeGraph, StringBuffer mergeLog) { // get all the clusters in our merge graph (where each cluster is a group of nodes to be merged) List clusters = extractWeakComponentClusters(mergeGraph); // for each cluster... for (Iterator clusterIt = clusters.iterator(); clusterIt.hasNext(); ) { LinkedHashSet cluster = (LinkedHashSet) clusterIt.next(); // mark that we will merge every node into a single node // (this step could be made smarter, but is ok for now. Will need user intervention in some // cases) Integer[] eachNodeInCluster = (Integer[]) cluster.toArray(new Integer[cluster.size()]); Integer firstNode = null; if (eachNodeInCluster.length <= 1) continue; for (int ii = 0; ii < eachNodeInCluster.length; ii++) { Integer node = eachNodeInCluster[ii]; if (firstNode == null) { // (we arbitrarily choose the first node as the node to merge other nodes into) firstNode = node; String nodeOneAttribute = (String) nodeTable.getString(firstNode.intValue(), this.compareAttributeName); mergeLog.append("for \"" + nodeOneAttribute + "\"..." + "\r\n"); } else { Integer nodeBeyondFirst = node; // (we merge nodes beyond the first into the first node) // (last value is off by one, because unique indices are 1-based instead of 0-based, but // otherwise correlate with row number. nodeTable.setInt( nodeBeyondFirst.intValue(), UNIQUE_INDEX_COLUMN_NAME, firstNode.intValue() + 1); String iAmNotThePrimaryNodeInCluster = ""; nodeTable.setString( nodeBeyondFirst.intValue(), COMBINE_VALUES_COLUMN_NAME, iAmNotThePrimaryNodeInCluster); String nodeOneAttribute = (String) nodeTable.getString(nodeBeyondFirst.intValue(), this.compareAttributeName); mergeLog.append(" merging in \"" + nodeOneAttribute + "\"" + "\r\n"); } } } return nodeTable; }
private Graph makeMergeGraph(Table nodeTable, StringBuffer noteLog) { Graph mergeGraph = makeEmptyMergeGraph(nodeTable); // for each group of nodes with a common attribute prefix... ListMap groupedNodes = sortNodesByAttributePrefix(nodeTable, this.compareAttributeName, this.numPrefixLetters); for (Iterator groupIt = groupedNodes.values().iterator(); groupIt.hasNext(); ) { List nodeGroup = (List) groupIt.next(); // for each pair of nodes in the group... for (int i = 0; i < nodeGroup.size(); i++) { Integer firstNodeIndex = (Integer) nodeGroup.get(i); for (int j = i; j < nodeGroup.size(); j++) { Integer secondNodeIndex = (Integer) nodeGroup.get(j); // test how similar the two nodes are float similarity = compareNodesCaseInsensitiveBy( this.compareAttributeName, firstNodeIndex, secondNodeIndex, nodeTable); // if their similarity is high enough to merge... if (similarity >= this.mergeOnSimilarity) { // link the nodes in the merge graph mergeGraph.addEdge(firstNodeIndex.intValue(), secondNodeIndex.intValue()); } // else if their similarity is noteworthy... else if (similarity >= this.makeNoteOnSimilarity) { // record it in the log String nodeOneAttribute = (String) nodeTable.getString(firstNodeIndex.intValue(), this.compareAttributeName); String nodeTwoAttribute = (String) nodeTable.getString(secondNodeIndex.intValue(), this.compareAttributeName); noteLog.append("" + similarity + " similar:" + "\r\n"); noteLog.append(" \"" + nodeOneAttribute + "\"" + "\r\n"); noteLog.append(" \"" + nodeTwoAttribute + "\"" + "\r\n"); } } } } return mergeGraph; }