Esempio n. 1
0
  public List<SPOTriplet> sentence2triplets(String sentence) {
    List<SPOTriplet> triplets = new ArrayList<SPOTriplet>();
    LinkGrammar.parse(sentence);
    int num_linkages = LinkGrammar.getNumLinkages();
    SPOTriplet triplet = new SPOTriplet();
    String alter_p = new String();
    Boolean ready = false;

    for (int l = 0; l < num_linkages; ++l) {

      LinkGrammar.makeLinkage(l);
      Linkage linkage = new Linkage();
      linkage.setLinkCost(LinkGrammar.getLinkageLinkCost());
      linkage.setNumViolations(LinkGrammar.getLinkageNumViolations());
      linkage.setDisjunctCost(LinkGrammar.getLinkageDisjunctCost());
      linkage.setLinkedWordCount(LinkGrammar.getNumWords());

      String[] disjuncts = new String[LinkGrammar.getNumWords()];
      String[] words = new String[LinkGrammar.getNumWords()];

      for (int k = 0; k < words.length; k++) {
        disjuncts[k] = LinkGrammar.getLinkageDisjunct(k);
        words[k] = LinkGrammar.getLinkageWord(k);
      }

      for (int k = 0; k < LinkGrammar.getNumLinks() && !ready; k++) {

        String c = LinkGrammar.getLinkLabel(k);
        if (c.charAt(0) == 'S') {
          triplet.p = LinkGrammar.getLinkageWord(k);
          alter_p = words[LinkGrammar.getLinkRWord(k)];
          triplet.s = words[LinkGrammar.getLinkLWord(k)];
        }

        if (c.charAt(0) == 'O') {
          triplet.o = words[LinkGrammar.getLinkRWord(k)];

          if (triplet.p.equals(words[LinkGrammar.getLinkRWord(k)])) {
            triplet.cut();
            triplets.add(triplet);
            ready = true;
            break;
          } else if (alter_p.equals(words[LinkGrammar.getLinkRWord(k)])) {
            triplet.p = alter_p;
            triplet.cut();
            triplets.add(triplet);
            ready = true;
            break;
          }
        }
      }
    }

    return triplets;
  }
  /**
   * Run the algorithm
   *
   * @param db Database
   * @param relation Relation
   * @return Clustering hierarchy
   */
  public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int size = ids.size();

    if (size > 0x10000) {
      throw new AbortException(
          "This implementation does not scale to data sets larger than "
              + 0x10000
              + " instances (~17 GB RAM), which results in an integer overflow.");
    }
    if (Linkage.SINGLE.equals(linkage)) {
      LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    }

    // Compute the initial (lower triangular) distance matrix.
    double[] scratch = new double[triangleSize(size)];
    DBIDArrayIter ix = ids.iter(), iy = ids.iter(), ij = ids.iter();
    // Position counter - must agree with computeOffset!
    int pos = 0;
    boolean square =
        Linkage.WARD.equals(linkage)
            && !(SquaredEuclideanDistanceFunction.class.isInstance(getDistanceFunction()));
    for (int x = 0; ix.valid(); x++, ix.advance()) {
      iy.seek(0);
      for (int y = 0; y < x; y++, iy.advance()) {
        scratch[pos] = dq.distance(ix, iy);
        // Ward uses variances -- i.e. squared values
        if (square) {
          scratch[pos] *= scratch[pos];
        }
        pos++;
      }
    }

    // Initialize space for result:
    WritableDBIDDataStore parent =
        DataStoreUtil.makeDBIDStorage(
            ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore height =
        DataStoreUtil.makeDoubleStorage(
            ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableIntegerDataStore csize =
        DataStoreUtil.makeIntegerStorage(
            ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
      parent.put(it, it);
      height.put(it, Double.POSITIVE_INFINITY);
      csize.put(it, 1);
    }

    // Repeat until everything merged, except the desired number of clusters:
    FiniteProgress prog =
        LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
    for (int i = 1; i < size; i++) {
      double min = Double.POSITIVE_INFINITY;
      int minx = -1, miny = -1;
      for (ix.seek(0); ix.valid(); ix.advance()) {
        if (height.doubleValue(ix) < Double.POSITIVE_INFINITY) {
          continue;
        }
        final int xbase = triangleSize(ix.getOffset());
        for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
          if (height.doubleValue(iy) < Double.POSITIVE_INFINITY) {
            continue;
          }
          final int idx = xbase + iy.getOffset();
          if (scratch[idx] <= min) {
            min = scratch[idx];
            minx = ix.getOffset();
            miny = iy.getOffset();
          }
        }
      }
      assert (minx >= 0 && miny >= 0);
      // Avoid allocating memory, by reusing existing iterators:
      ix.seek(minx);
      iy.seek(miny);
      // Perform merge in data structure: x -> y
      // Since y < x, prefer keeping y, dropping x.
      int sizex = csize.intValue(ix), sizey = csize.intValue(iy);
      height.put(ix, min);
      parent.put(ix, iy);
      csize.put(iy, sizex + sizey);

      // Update distance matrix. Note: miny < minx
      final int xbase = triangleSize(minx), ybase = triangleSize(miny);
      // Write to (y, j), with j < y
      for (ij.seek(0); ij.getOffset() < miny; ij.advance()) {
        if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
          continue;
        }
        final int sizej = csize.intValue(ij);
        scratch[ybase + ij.getOffset()] =
            linkage.combine(
                sizex,
                scratch[xbase + ij.getOffset()],
                sizey,
                scratch[ybase + ij.getOffset()],
                sizej,
                min);
      }
      // Write to (j, y), with y < j < x
      for (ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) {
        if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
          continue;
        }
        final int jbase = triangleSize(ij.getOffset());
        final int sizej = csize.intValue(ij);
        scratch[jbase + miny] =
            linkage.combine(
                sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + miny], sizej, min);
      }
      // Write to (j, y), with y < x < j
      for (ij.seek(minx + 1); ij.valid(); ij.advance()) {
        if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
          continue;
        }
        final int jbase = triangleSize(ij.getOffset());
        final int sizej = csize.intValue(ij);
        scratch[jbase + miny] =
            linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min);
      }
      LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);

    return new PointerHierarchyRepresentationResult(ids, parent, height);
  }