private void calculateBuckets( Set<? extends Person> persons, DynamicDoubleArray sums, DynamicIntArray counts, String xAttrKey, String yAttrKey) { TIntDoubleHashMap sumBuckets = new TIntDoubleHashMap(); TIntIntHashMap countBuckets = new TIntIntHashMap(); for (Person person : persons) { String xValStr = person.getAttribute(xAttrKey); String yValStr = person.getAttribute(yAttrKey); if (xValStr != null && yValStr != null) { double xVal = Double.parseDouble(xValStr); double yVal = Double.parseDouble(yValStr); int bucketIdx = xDataDiscr.index(xVal); sumBuckets.adjustOrPutValue(bucketIdx, yVal, yVal); countBuckets.adjustOrPutValue(bucketIdx, 1, 1); } } TIntDoubleIterator it = sumBuckets.iterator(); for (int i = 0; i < sumBuckets.size(); i++) { it.advance(); int bucketIndex = it.key(); double sum = it.value(); int cnt = countBuckets.get(bucketIndex); sums.set(bucketIndex, sum); counts.set(bucketIndex, cnt); } }
@Override public TIntDoubleHashMap getRating( Document document, List<TokenSequence<SemanticEntity>> entities) { DirectedGraph<Integer, RDFEdge> graph = document.getGraph(); TIntHashSet nodes = new TIntHashSet(); for (TokenSequence<SemanticEntity> ts : entities) { nodes.add(ts.getValue().getSubjectIndex()); } final TIntDoubleHashMap m = new TIntDoubleHashMap(); for (int node : nodes.toArray()) { m.put(node, Math.min(graph.getOutEdges(node).size(), graph.getInEdges(node).size())); } return m; }
/** * For a specific sub-set of blocks (child nodes), find a 'base' subset of parents for which the * block's logLikelihood is not -Infinity * * @param candidateParentsPerNode * @param chosenArcsPerNode * @param setOfBlocks * @return */ protected double getOutOfMinusInfinity( Int2ObjectOpenHashMap<IntOpenHashSet> candidateParentsPerNode, Int2ObjectOpenHashMap<ObjectOpenHashSet<Arc>> chosenArcsPerNode, IntOpenHashSet setOfBlocks, TIntDoubleHashMap logLPerNode) { double totalLogL = 0; ProgressLogger pl = new ProgressLogger(LOGGER, ProgressLogger.TEN_SECONDS, "blocks"); pl.start("Begin initializing, to avoid zero likelihood, using set-cover heuristic"); pl.expectedUpdates = setOfBlocks.size(); int nArcs = 0; for (int v : setOfBlocks) { pl.update(); IntOpenHashSet vParents = candidateParentsPerNode.get(v); Int2ObjectOpenHashMap<IntOpenHashSet> parentActions = new Int2ObjectOpenHashMap<IntOpenHashSet>(); Int2ObjectOpenHashMap<IntArrayList> cPlusV = auxiliary.getCplusOnline(v); Int2ObjectOpenHashMap<IntArrayList> cMinusV = auxiliary.getCminusOnline(v); if (cPlusV != null) { IntSet actions = cPlusV.keySet(); // Heuristic: first add the parents that participate in A+ for // most actions for (int action : actions) { for (int u : cPlusV.get(action)) { if (!parentActions.containsKey(u)) { parentActions.put(u, new IntOpenHashSet()); } parentActions.get(u).add(action); } } } KeepMaximum km = new KeepMaximum(); km.addAllKey2Listsize(parentActions); IntOpenHashSet baseSetOfParents = new IntOpenHashSet(); double logL = Double.NEGATIVE_INFINITY; while (logL == Double.NEGATIVE_INFINITY && (km.getMaximumKey() != -1)) { int u = km.getMaximumKey(); if (baseSetOfParents.contains(u)) { throw new IllegalStateException("Attempted to add twice the same parent"); } baseSetOfParents.add(u); logL = blockLogLikelihood(v, cPlusV, cMinusV, baseSetOfParents); IntOpenHashSet uActions = parentActions.get(u); for (int parent : vParents) { parentActions.get(parent).removeAll(uActions); } vParents.remove(u); parentActions.remove(u); km.reset(); km.addAllKey2Listsize(parentActions); } // keep track of the likelihood totalLogL += logL; if (logLPerNode != null) { logLPerNode.put(v, logL); } chosenArcsPerNode.put(v, new ObjectOpenHashSet<Arc>()); for (int u : baseSetOfParents) { nArcs++; chosenArcsPerNode.get(v).add(new Arc(u, v)); } } pl.stop("Done initialization. Added " + nArcs + " arcs, logLikelihood=" + totalLogL); return totalLogL; }