public void writeRating(Rating rating) throws IOException { // the buffer should already be clear assert ratingBuffer.position() == 0; assert ratingBuffer.limit() == ratingBuffer.capacity(); checkUpgrade(rating.getUserId(), rating.getItemId()); // and use it format.renderRating(rating, ratingBuffer); ratingBuffer.flip(); BinaryUtils.writeBuffer(channel, ratingBuffer); ratingBuffer.clear(); saveIndex(userMap, rating.getUserId(), index); saveIndex(itemMap, rating.getItemId(), index); index += 1; if (format.hasTimestamps()) { long ts = rating.getTimestamp(); // did this timestamp send us backwards? if (ts < lastTimestamp && !needsSorting) { logger.debug("found out-of-order timestamps, activating sorting"); needsSorting = true; } lastTimestamp = ts; } }
@Test public void testLimitedView() throws IOException { File file = folder.newFile(); FileChannel chan = new RandomAccessFile(file, "rw").getChannel(); BinaryIndexTableWriter writer = BinaryIndexTableWriter.create(BinaryFormat.create(), chan, 3); writer.writeEntry(12, new int[] {0}); writer.writeEntry(17, new int[] {1, 3}); writer.writeEntry(19, new int[] {4, 5}); MappedByteBuffer buffer = chan.map(FileChannel.MapMode.READ_ONLY, 0, chan.size()); BinaryIndexTable tbl = BinaryIndexTable.fromBuffer(3, buffer); tbl = tbl.createLimitedView(2); assertThat(tbl.getKeys(), hasSize(2)); assertThat(tbl.getKeys(), contains(12L, 17L)); assertThat(tbl.getEntry(12), contains(0)); assertThat(tbl.getEntry(17), contains(1)); assertThat(tbl.getEntry(19), nullValue()); assertThat(tbl.getEntry(-1), nullValue()); BinaryIndexTable serializedTbl = SerializationUtils.clone(tbl); assertThat(serializedTbl.getKeys(), hasSize(2)); assertThat(serializedTbl.getKeys(), contains(12L, 17L)); assertThat(serializedTbl.getEntry(12), contains(0)); assertThat(serializedTbl.getEntry(17), contains(1)); assertThat(serializedTbl.getEntry(19), nullValue()); assertThat(serializedTbl.getEntry(-1), nullValue()); }
/** * Render this header to a byte buffer. * * @param buf The target buffer. */ public void render(ByteBuffer buf) { buf.put(BinaryFormat.HEADER_MAGIC); buf.putShort(format.getFlagWord()); buf.putInt(ratingCount); buf.putInt(userCount); buf.putInt(itemCount); }
/** * Create a new binary rating packer. * * @param file The output file. * @throws IOException The output exception. */ BinaryRatingPacker(File file, EnumSet<BinaryFormatFlag> flags) throws IOException { format = BinaryFormat.createWithFlags(PackHeaderFlag.fromFormatFlags(flags)); outputFile = file; logger.debug("opening binary pack file {}", outputFile); output = new RandomAccessFile(file, "rw"); channel = output.getChannel(); userMap = new Long2ObjectOpenHashMap<IntList>(); itemMap = new Long2ObjectOpenHashMap<IntList>(); lastTimestamp = Long.MIN_VALUE; needsSorting = false; index = 0; // skip the header channel.position(BinaryHeader.HEADER_SIZE); ratingBuffer = ByteBuffer.allocateDirect(format.getRatingSize()); }
@Test public void testSingleEntry() throws IOException { File file = folder.newFile(); FileChannel chan = new RandomAccessFile(file, "rw").getChannel(); BinaryIndexTableWriter w = BinaryIndexTableWriter.create(BinaryFormat.create(), chan, 1); w.writeEntry(42, new int[] {0}); MappedByteBuffer buf = chan.map(FileChannel.MapMode.READ_ONLY, 0, chan.size()); BinaryIndexTable tbl = BinaryIndexTable.fromBuffer(1, buf); assertThat(tbl.getKeys(), contains(42L)); assertThat(tbl.getEntry(42), contains(0)); assertThat(tbl.getEntry(43), nullValue()); }
/** * Parse a header from a byte buffer. The buffer's position will be advanced to the end of the * header. * * @param buf The byte buffer. * @return The header. */ public static BinaryHeader fromHeader(ByteBuffer buf) { Preconditions.checkArgument(buf.remaining() >= HEADER_SIZE, "buffer not large enough"); byte[] magic = new byte[2]; buf.get(magic); if (!Arrays.equals(magic, BinaryFormat.HEADER_MAGIC)) { throw new IllegalArgumentException("invalid magic"); } short word = buf.getShort(); BinaryFormat format = BinaryFormat.fromFlags(word); int nratings = buf.getInt(); int nusers = buf.getInt(); int nitems = buf.getInt(); return new BinaryHeader(format, nratings, nusers, nitems); }
private void checkUpgrade(long uid, long iid) throws IOException { Set<PackHeaderFlag> toRemove = null; if (!format.userIdIsValid(uid)) { assert format.hasCompactUsers(); toRemove = EnumSet.of(PackHeaderFlag.COMPACT_USERS); } if (!format.itemIdIsValid(iid)) { assert format.hasCompactItems(); if (toRemove == null) { toRemove = EnumSet.of(PackHeaderFlag.COMPACT_ITEMS); } else { toRemove.add(PackHeaderFlag.COMPACT_ITEMS); } } if (toRemove != null) { Set<PackHeaderFlag> newFlags = EnumSet.copyOf(format.getFlags()); newFlags.removeAll(toRemove); BinaryFormat newFormat = BinaryFormat.createWithFlags(newFlags); if (newFormat != format) { upgradeRatings(newFormat); } } }
private long ratingPos(int idx) { long offset = format.getHeaderSize(); return offset + idx * format.getRatingSize(); }
private void upgradeRatings(BinaryFormat newFormat) throws IOException { Preconditions.checkArgument( newFormat.getRatingSize() > format.getRatingSize(), "new format is not wider than old"); logger.info("upgrading {} ratings from {} to {}", index, format, newFormat); ByteBuffer oldBuffer = ByteBuffer.allocateDirect(format.getRatingSize()); ByteBuffer newBuffer = ByteBuffer.allocateDirect(newFormat.getRatingSize()); MutableRating scratch = new MutableRating(); long oldPos = BinaryHeader.HEADER_SIZE + index * format.getRatingSize(); Preconditions.checkState(channel.position() == oldPos, "channel is at the wrong position"); long newPos = BinaryHeader.HEADER_SIZE + index * newFormat.getRatingSize(); channel.position(newPos); // loop backwards, coping each rating to later in the file for (int i = index - 1; i >= 0; i--) { oldPos -= format.getRatingSize(); newPos -= newFormat.getRatingSize(); // read the old rating BinaryUtils.readBuffer(channel, oldBuffer, oldPos); oldBuffer.flip(); format.readRating(oldBuffer, scratch); oldBuffer.clear(); // write the new rating newFormat.renderRating(scratch, newBuffer); newBuffer.flip(); BinaryUtils.writeBuffer(channel, newBuffer, newPos); newBuffer.clear(); } assert oldPos == BinaryHeader.HEADER_SIZE; assert newPos == BinaryHeader.HEADER_SIZE; format = newFormat; ratingBuffer = ByteBuffer.allocateDirect(newFormat.getRatingSize()); }
public int getRatingDataSize() { return getRatingCount() * format.getRatingSize(); }