コード例 #1
0
  private ListMap sortNodesByAttributePrefix(
      Table nodeTable, String compareAttributeName, int numPrefixLetters) {
    ListMap nodesByAttributePrefix = new ListMap();
    // for each node in the node table...
    for (IntIterator nodeIndexIt = nodeTable.rows(); nodeIndexIt.hasNext(); ) {
      int nodeIndex = nodeIndexIt.nextInt();
      Tuple row = nodeTable.getTuple(nodeIndex);
      // get the attribute contents
      String comparisonAttributeContents = row.getString(compareAttributeName);
      if (comparisonAttributeContents == null) continue;
      // add the node index to our list map, in the bin with a key made from a prefix of the
      // attribute e.g. "do" for "donkey"
      String prefixKey = extractPrefixKey(comparisonAttributeContents, numPrefixLetters);
      nodesByAttributePrefix.put(prefixKey, new Integer(nodeIndex));
    }

    return nodesByAttributePrefix;
  }
コード例 #2
0
  private Graph makeMergeGraph(Table nodeTable, StringBuffer noteLog) {
    Graph mergeGraph = makeEmptyMergeGraph(nodeTable);
    // for each group of nodes with a common attribute prefix...
    ListMap groupedNodes =
        sortNodesByAttributePrefix(nodeTable, this.compareAttributeName, this.numPrefixLetters);
    for (Iterator groupIt = groupedNodes.values().iterator(); groupIt.hasNext(); ) {
      List nodeGroup = (List) groupIt.next();
      // for each pair of nodes in the group...
      for (int i = 0; i < nodeGroup.size(); i++) {
        Integer firstNodeIndex = (Integer) nodeGroup.get(i);
        for (int j = i; j < nodeGroup.size(); j++) {
          Integer secondNodeIndex = (Integer) nodeGroup.get(j);
          // test how similar the two nodes are
          float similarity =
              compareNodesCaseInsensitiveBy(
                  this.compareAttributeName, firstNodeIndex, secondNodeIndex, nodeTable);

          // if their similarity is high enough to merge...
          if (similarity >= this.mergeOnSimilarity) {
            // link the nodes in the merge graph
            mergeGraph.addEdge(firstNodeIndex.intValue(), secondNodeIndex.intValue());
          }
          // else if their similarity is noteworthy...
          else if (similarity >= this.makeNoteOnSimilarity) {
            // record it in the log
            String nodeOneAttribute =
                (String) nodeTable.getString(firstNodeIndex.intValue(), this.compareAttributeName);
            String nodeTwoAttribute =
                (String) nodeTable.getString(secondNodeIndex.intValue(), this.compareAttributeName);
            noteLog.append("" + similarity + " similar:" + "\r\n");
            noteLog.append("  \"" + nodeOneAttribute + "\"" + "\r\n");
            noteLog.append("  \"" + nodeTwoAttribute + "\"" + "\r\n");
          }
        }
      }
    }

    return mergeGraph;
  }