@Override
  public TIntDoubleHashMap getRating(
      Document document, List<TokenSequence<SemanticEntity>> entities) {

    DirectedGraph<Integer, RDFEdge> graph = document.getGraph();

    TIntHashSet nodes = new TIntHashSet();
    for (TokenSequence<SemanticEntity> ts : entities) {
      nodes.add(ts.getValue().getSubjectIndex());
    }

    final TIntDoubleHashMap m = new TIntDoubleHashMap();

    for (int node : nodes.toArray()) {
      m.put(node, Math.min(graph.getOutEdges(node).size(), graph.getInEdges(node).size()));
    }
    return m;
  }
Beispiel #2
0
  /**
   * For a specific sub-set of blocks (child nodes), find a 'base' subset of parents for which the
   * block's logLikelihood is not -Infinity
   *
   * @param candidateParentsPerNode
   * @param chosenArcsPerNode
   * @param setOfBlocks
   * @return
   */
  protected double getOutOfMinusInfinity(
      Int2ObjectOpenHashMap<IntOpenHashSet> candidateParentsPerNode,
      Int2ObjectOpenHashMap<ObjectOpenHashSet<Arc>> chosenArcsPerNode,
      IntOpenHashSet setOfBlocks,
      TIntDoubleHashMap logLPerNode) {
    double totalLogL = 0;

    ProgressLogger pl = new ProgressLogger(LOGGER, ProgressLogger.TEN_SECONDS, "blocks");
    pl.start("Begin initializing, to avoid zero likelihood, using set-cover heuristic");
    pl.expectedUpdates = setOfBlocks.size();
    int nArcs = 0;
    for (int v : setOfBlocks) {
      pl.update();

      IntOpenHashSet vParents = candidateParentsPerNode.get(v);

      Int2ObjectOpenHashMap<IntOpenHashSet> parentActions =
          new Int2ObjectOpenHashMap<IntOpenHashSet>();

      Int2ObjectOpenHashMap<IntArrayList> cPlusV = auxiliary.getCplusOnline(v);
      Int2ObjectOpenHashMap<IntArrayList> cMinusV = auxiliary.getCminusOnline(v);

      if (cPlusV != null) {
        IntSet actions = cPlusV.keySet();
        // Heuristic: first add the parents that participate in A+ for
        // most actions
        for (int action : actions) {
          for (int u : cPlusV.get(action)) {
            if (!parentActions.containsKey(u)) {
              parentActions.put(u, new IntOpenHashSet());
            }
            parentActions.get(u).add(action);
          }
        }
      }

      KeepMaximum km = new KeepMaximum();
      km.addAllKey2Listsize(parentActions);

      IntOpenHashSet baseSetOfParents = new IntOpenHashSet();
      double logL = Double.NEGATIVE_INFINITY;
      while (logL == Double.NEGATIVE_INFINITY && (km.getMaximumKey() != -1)) {
        int u = km.getMaximumKey();
        if (baseSetOfParents.contains(u)) {
          throw new IllegalStateException("Attempted to add twice the same parent");
        }
        baseSetOfParents.add(u);
        logL = blockLogLikelihood(v, cPlusV, cMinusV, baseSetOfParents);
        IntOpenHashSet uActions = parentActions.get(u);
        for (int parent : vParents) {
          parentActions.get(parent).removeAll(uActions);
        }
        vParents.remove(u);
        parentActions.remove(u);
        km.reset();
        km.addAllKey2Listsize(parentActions);
      }

      // keep track of the likelihood
      totalLogL += logL;
      if (logLPerNode != null) {
        logLPerNode.put(v, logL);
      }

      chosenArcsPerNode.put(v, new ObjectOpenHashSet<Arc>());
      for (int u : baseSetOfParents) {
        nArcs++;
        chosenArcsPerNode.get(v).add(new Arc(u, v));
      }
    }
    pl.stop("Done initialization. Added " + nArcs + " arcs, logLikelihood=" + totalLogL);
    return totalLogL;
  }