/** * Add a scored ID without boxing. The default value will be used for each channel. * * @param id The ID to add. * @param score The score for the ID. * @return The builder (for chaining). */ public ScoredIdListBuilder add(long id, double score) { Preconditions.checkState(ids != null, "builder has been finished"); ids.add(id); scores.add(score); for (ChannelStorage chan : channels.values()) { assert chan.values.size() == ids.size() - 1; chan.values.add(chan.defaultValue); } for (TypedChannelStorage chan : typedChannels.values()) { assert chan.values.size() == ids.size() - 1; chan.values.add(chan.defaultValue); } return this; }
/** * Add a scored ID. The ID is copied into the builder, not referenced. All side channels on the ID * must have already been added with one of the {@code addChannel} methods. * * @param id The ID. * @return The builder (for chaining). */ public ScoredIdListBuilder add(ScoredId id) { Preconditions.checkState(ids != null, "builder has been finished"); // check whether all symbols are valid Collection<SymbolValue<?>> chans = id.getChannels(); if (!ignoreUnknown) { for (SymbolValue<?> chan : chans) { TypedSymbol<?> sym = chan.getSymbol(); boolean good = sym.getType().equals(Double.class) ? channels.containsKey(sym.getRawSymbol()) : typedChannels.containsKey(sym); if (!good) { throw new IllegalArgumentException("channel " + sym + " not known"); } } } // now we're ready to add int idx = ids.size(); add(id.getId(), id.getScore()); assert ids.size() == idx + 1; assert scores.size() == idx + 1; for (SymbolValue<?> sv : chans) { TypedSymbol<?> sym = sv.getSymbol(); if (sym.getType().equals(Double.class) && channels.containsKey(sym.getRawSymbol())) { ChannelStorage chan = channels.get(sym.getRawSymbol()); assert chan.values.size() == idx + 1; if (sv instanceof DoubleSymbolValue) { chan.values.set(idx, ((DoubleSymbolValue) sv).getDoubleValue()); } else { Object v = sv.getValue(); chan.values.set(idx, (Double) v); } } else { TypedChannelStorage chan = typedChannels.get(sv.getSymbol()); if (chan != null) { assert chan.values.size() == idx + 1; chan.values.set(idx, sv.getValue()); } } } return this; }
/** * Build a sparse vector directly from the list of IDs. This allows a scored ID list builder to be * used to efficiently accumulate a sparse vector. If the same ID is added multiple times, the * first instance is used. * * @return A sparse vector containing the data accumulated. */ public ImmutableSparseVector buildVector() { MutableSparseVector msv = MutableSparseVector.create(ids); final int size = size(); for (int i = 0; i < size; i++) { msv.set(ids.get(i), scores.get(i)); } for (ChannelStorage chan : channels.values()) { MutableSparseVector vchan = msv.getOrAddChannelVector(chan.symbol); for (int i = 0; i < size; i++) { vchan.set(ids.get(i), chan.values.get(i)); } } for (TypedChannelStorage<?> chan : typedChannels.values()) { Long2ObjectMap vchan = msv.getOrAddChannel(chan.symbol); for (int i = 0; i < size; i++) { vchan.put(ids.get(i), chan.values.get(i)); } } return msv.freeze(); }
/** * Implementation of {@link #build()} and {@link #finish()}. * * @param reuse Whether we should try to reuse the builder's storage for the packed list. If * {@code true}, the builder will be invalid after finishing and the packed list will use the * same arrays as the builder if they are full. * @return The packed ID list. */ private PackedScoredIdList finish(boolean reuse) { Preconditions.checkState(ids != null, "builder has been finished"); Map<Symbol, DoubleList> chans; Map<TypedSymbol<?>, List<?>> typedChans; if (size() > 0) { ImmutableMap.Builder<Symbol, DoubleList> cbld = ImmutableMap.builder(); for (ChannelStorage chan : channels.values()) { DoubleArrayList built; if (reuse) { built = chan.values; built.trim(); } else { built = new DoubleArrayList(chan.values); } cbld.put(chan.symbol, built); } chans = cbld.build(); ImmutableMap.Builder<TypedSymbol<?>, List<?>> tcbld = ImmutableMap.builder(); for (TypedChannelStorage<?> chan : typedChannels.values()) { List<?> built; if (reuse) { chan.values.trimToSize(); built = chan.values; } else { built = new ArrayList<Object>(chan.values); } tcbld.put(chan.symbol, built); } typedChans = tcbld.build(); } else { chans = Collections.emptyMap(); typedChans = Collections.emptyMap(); } LongList builtIds; DoubleList builtScores; if (reuse) { ids.trim(); builtIds = ids; scores.trim(); builtScores = scores; clear(); } else { builtIds = new CompactableLongArrayList(ids); builtScores = new DoubleArrayList(scores); } return new PackedScoredIdList(builtIds, builtScores, typedChans, chans); }
/** * Get the number of items currently in the builder. * * @return The number of items in the builder. */ public int size() { assert ids.size() == scores.size(); return ids.size(); }