private static FastByIDMap<float[]> buildIdentity(long[] idsInOrder) { int n = idsInOrder.length; FastByIDMap<float[]> identity = new FastByIDMap<float[]>(n); for (int i = 0; i < n; i++) { float[] rowOrCol = new float[n]; rowOrCol[i] = 1.0f; identity.put(idsInOrder[i], rowOrCol); } return identity; }
private static long[] idsInOrder(FastByIDMap<?> input) { int n = input.size(); long[] idsInOrder = new long[n]; int count = 0; LongPrimitiveIterator it = input.keySetIterator(); while (it.hasNext()) { idsInOrder[count] = it.nextLong(); count++; } Preconditions.checkState(n == count); return idsInOrder; }
private static FastByIDMap<float[]> buildBinarizedMatrix( FastByIDMap<FastByIDFloatMap> input, long[] idsInOrder) { int n = idsInOrder.length; FastByIDMap<float[]> result = new FastByIDMap<float[]>(); for (FastByIDMap.MapEntry<FastByIDFloatMap> entry : input.entrySet()) { float[] rowOrCol = new float[n]; FastByIDFloatMap inputValues = entry.getValue(); for (int i = 0; i < n; i++) { if (inputValues.containsKey(idsInOrder[i])) { rowOrCol[i] = 1.0f; } } result.put(entry.getKey(), rowOrCol); } return result; }
public static void writeUnreducedModel(File inputDir) throws IOException { Preconditions.checkNotNull(inputDir); Preconditions.checkArgument(inputDir.exists()); Preconditions.checkArgument(inputDir.isDirectory()); FastByIDMap<FastIDSet> knownItemIDs = new FastByIDMap<FastIDSet>(10000, 1.25f); FastByIDMap<FastByIDFloatMap> RbyRow = new FastByIDMap<FastByIDFloatMap>(10000, 1.25f); FastByIDMap<FastByIDFloatMap> RbyColumn = new FastByIDMap<FastByIDFloatMap>(10000, 1.25f); FastIDSet itemTagIDs = new FastIDSet(1000, 1.25f); FastIDSet userTagIDs = new FastIDSet(1000, 1.25f); InputFilesReader.readInputFiles( knownItemIDs, RbyRow, RbyColumn, itemTagIDs, userTagIDs, inputDir); int numUsers = RbyRow.size(); int numItems = RbyColumn.size(); if (numUsers == 0 || numItems == 0) { log.warn("No input?"); return; } FastByIDMap<float[]> X; FastByIDMap<float[]> Y; if (numUsers < numItems) { log.info( "{} users < {} items; input will be written as the feature-item matrix", numUsers, numItems); long[] idsInOrder = idsInOrder(RbyRow); X = buildIdentity(idsInOrder); Y = buildBinarizedMatrix(RbyColumn, idsInOrder); } else { log.info( "{} users >= {} items; input will be written as the user-feature matrix", numUsers, numItems); long[] idsInOrder = idsInOrder(RbyColumn); X = buildBinarizedMatrix(RbyRow, idsInOrder); Y = buildIdentity(idsInOrder); } Generation generation = new Generation(knownItemIDs, X, Y, itemTagIDs, userTagIDs); GenerationSerializer.writeGeneration(generation, new File(inputDir, "model.bin.gz")); }