public Graph toGraph() { Table edgeTable = new Table(); Table nodeTable = new Table(); HashMap<String, Integer> unique = new HashMap<String, Integer>(); edgeTable.addColumn("Node1", int.class); edgeTable.addColumn("Node2", int.class); nodeTable.addColumn("key", int.class); nodeTable.addColumn("name", String.class); nodeTable.addColumn("type", String.class); nodeTable.addColumn("indeterminate", int.class); int idx = 0; for (Protein prot : minProteins.values()) { int row = nodeTable.addRow(); unique.put(prot.getName(), idx); nodeTable.setInt(row, "key", idx++); nodeTable.setString(row, "name", prot.getName()); nodeTable.setString(row, "type", "protein"); nodeTable.setInt(row, "indeterminate", 0); } for (Peptide pep : minPeptides.values()) { int row = nodeTable.addRow(); unique.put(pep.getSequence(), idx); nodeTable.setInt(row, "key", idx++); nodeTable.setString(row, "name", pep.getSequence()); nodeTable.setString(row, "type", "peptide"); if (pep.getIndeterminateType() == PeptideIndeterminacyType.NONE) { nodeTable.setInt(row, "indeterminate", 0); } else { nodeTable.setInt(row, "indeterminate", 1); } } for (Protein prot : minProteins.values()) { int id1 = unique.get(prot.getName()); for (String pep : prot.getPeptides()) { int id2 = unique.get(pep); int row = edgeTable.addRow(); edgeTable.setInt(row, "Node1", id1); edgeTable.setInt(row, "Node2", id2); } } Graph g = new Graph(nodeTable, edgeTable, false, "key", "Node1", "Node2"); // System.err.println(g.getEdgeCount()); return g; }
private Table processJournalName(Table table) throws IOException { ScimapsJournalMatcher scimapsJournalMatcher = new ScimapsJournalMatcher(); // Create new output table Table outputTable = TableUtilities.copyTable(table); outputTable.addColumn(STANDARDIZED_JOURNAL_NAME_COLUMN, String.class); int standardizedJournalNameColumnIndex = outputTable.getColumnNumber(STANDARDIZED_JOURNAL_NAME_COLUMN); // Retrieve iterator Iterator<?> rows = outputTable.tuples(); // Process journal names int rowIndex = 0; while (rows.hasNext()) { Tuple row = (Tuple) rows.next(); if (row.canGetString(journalColumnName)) { String name = row.getString(journalColumnName); String suggestedName = scimapsJournalMatcher.get(name); outputTable.setString(rowIndex, standardizedJournalNameColumnIndex, suggestedName); } rowIndex++; } return outputTable; }
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) { // first clear the character buffer m_sbuf.delete(0, m_sbuf.length()); if (qName.equals(GRAPH)) { // parse directedness default String edef = atts.getValue(EDGEDEF); m_directed = DIRECTED.equalsIgnoreCase(edef); m_graphid = atts.getValue(ID); } else if (qName.equals(KEY)) { if (!inSchema) { error( "\"" + KEY + "\" elements can not" + " occur after the first node or edge declaration."); } m_for = atts.getValue(FOR); m_id = atts.getValue(ID); m_name = atts.getValue(ATTRNAME); m_type = atts.getValue(ATTRTYPE); } else if (qName.equals(NODE)) { schemaCheck(); m_row = m_nodes.addRow(); String id = atts.getValue(ID); m_nodeMap.put(id, new Integer(m_row)); m_table = m_nodes; } else if (qName.equals(EDGE)) { schemaCheck(); m_row = m_edges.addRow(); // do not use the id value // String id = atts.getValue(ID); // if ( id != null ) { // if ( !m_edges.canGetString(ID) ) // m_edges.addColumn(ID, String.class); // m_edges.setString(m_row, ID, id); // } m_edges.setString(m_row, SRCID, atts.getValue(SRC)); m_edges.setString(m_row, TRGID, atts.getValue(TRG)); // currently only global directedness is used // ignore directed edge value for now // String dir = atts.getValue(DIRECTED); // boolean d = m_directed; // if ( dir != null ) { // d = dir.equalsIgnoreCase("false"); // } // m_edges.setBoolean(m_row, DIRECTED, d); m_table = m_edges; } else if (qName.equals(DATA)) { m_key = atts.getValue(KEY); } }
// this mutates nodeTable private Table makeMergeTable(Table nodeTable, Graph mergeGraph, StringBuffer mergeLog) { // get all the clusters in our merge graph (where each cluster is a group of nodes to be merged) List clusters = extractWeakComponentClusters(mergeGraph); // for each cluster... for (Iterator clusterIt = clusters.iterator(); clusterIt.hasNext(); ) { LinkedHashSet cluster = (LinkedHashSet) clusterIt.next(); // mark that we will merge every node into a single node // (this step could be made smarter, but is ok for now. Will need user intervention in some // cases) Integer[] eachNodeInCluster = (Integer[]) cluster.toArray(new Integer[cluster.size()]); Integer firstNode = null; if (eachNodeInCluster.length <= 1) continue; for (int ii = 0; ii < eachNodeInCluster.length; ii++) { Integer node = eachNodeInCluster[ii]; if (firstNode == null) { // (we arbitrarily choose the first node as the node to merge other nodes into) firstNode = node; String nodeOneAttribute = (String) nodeTable.getString(firstNode.intValue(), this.compareAttributeName); mergeLog.append("for \"" + nodeOneAttribute + "\"..." + "\r\n"); } else { Integer nodeBeyondFirst = node; // (we merge nodes beyond the first into the first node) // (last value is off by one, because unique indices are 1-based instead of 0-based, but // otherwise correlate with row number. nodeTable.setInt( nodeBeyondFirst.intValue(), UNIQUE_INDEX_COLUMN_NAME, firstNode.intValue() + 1); String iAmNotThePrimaryNodeInCluster = ""; nodeTable.setString( nodeBeyondFirst.intValue(), COMBINE_VALUES_COLUMN_NAME, iAmNotThePrimaryNodeInCluster); String nodeOneAttribute = (String) nodeTable.getString(nodeBeyondFirst.intValue(), this.compareAttributeName); mergeLog.append(" merging in \"" + nodeOneAttribute + "\"" + "\r\n"); } } } return nodeTable; }