示例#1
0
  public void writeRating(Rating rating) throws IOException {
    // the buffer should already be clear
    assert ratingBuffer.position() == 0;
    assert ratingBuffer.limit() == ratingBuffer.capacity();

    checkUpgrade(rating.getUserId(), rating.getItemId());

    // and use it
    format.renderRating(rating, ratingBuffer);
    ratingBuffer.flip();
    BinaryUtils.writeBuffer(channel, ratingBuffer);
    ratingBuffer.clear();

    saveIndex(userMap, rating.getUserId(), index);
    saveIndex(itemMap, rating.getItemId(), index);
    index += 1;

    if (format.hasTimestamps()) {
      long ts = rating.getTimestamp();
      // did this timestamp send us backwards?
      if (ts < lastTimestamp && !needsSorting) {
        logger.debug("found out-of-order timestamps, activating sorting");
        needsSorting = true;
      }
      lastTimestamp = ts;
    }
  }
  @Test
  public void testLimitedView() throws IOException {
    File file = folder.newFile();
    FileChannel chan = new RandomAccessFile(file, "rw").getChannel();
    BinaryIndexTableWriter writer = BinaryIndexTableWriter.create(BinaryFormat.create(), chan, 3);
    writer.writeEntry(12, new int[] {0});
    writer.writeEntry(17, new int[] {1, 3});
    writer.writeEntry(19, new int[] {4, 5});

    MappedByteBuffer buffer = chan.map(FileChannel.MapMode.READ_ONLY, 0, chan.size());
    BinaryIndexTable tbl = BinaryIndexTable.fromBuffer(3, buffer);
    tbl = tbl.createLimitedView(2);
    assertThat(tbl.getKeys(), hasSize(2));
    assertThat(tbl.getKeys(), contains(12L, 17L));
    assertThat(tbl.getEntry(12), contains(0));
    assertThat(tbl.getEntry(17), contains(1));
    assertThat(tbl.getEntry(19), nullValue());
    assertThat(tbl.getEntry(-1), nullValue());

    BinaryIndexTable serializedTbl = SerializationUtils.clone(tbl);
    assertThat(serializedTbl.getKeys(), hasSize(2));
    assertThat(serializedTbl.getKeys(), contains(12L, 17L));
    assertThat(serializedTbl.getEntry(12), contains(0));
    assertThat(serializedTbl.getEntry(17), contains(1));
    assertThat(serializedTbl.getEntry(19), nullValue());
    assertThat(serializedTbl.getEntry(-1), nullValue());
  }
示例#3
0
 /**
  * Render this header to a byte buffer.
  *
  * @param buf The target buffer.
  */
 public void render(ByteBuffer buf) {
   buf.put(BinaryFormat.HEADER_MAGIC);
   buf.putShort(format.getFlagWord());
   buf.putInt(ratingCount);
   buf.putInt(userCount);
   buf.putInt(itemCount);
 }
示例#4
0
  /**
   * Create a new binary rating packer.
   *
   * @param file The output file.
   * @throws IOException The output exception.
   */
  BinaryRatingPacker(File file, EnumSet<BinaryFormatFlag> flags) throws IOException {
    format = BinaryFormat.createWithFlags(PackHeaderFlag.fromFormatFlags(flags));
    outputFile = file;

    logger.debug("opening binary pack file {}", outputFile);
    output = new RandomAccessFile(file, "rw");
    channel = output.getChannel();

    userMap = new Long2ObjectOpenHashMap<IntList>();
    itemMap = new Long2ObjectOpenHashMap<IntList>();

    lastTimestamp = Long.MIN_VALUE;
    needsSorting = false;
    index = 0;

    // skip the header
    channel.position(BinaryHeader.HEADER_SIZE);

    ratingBuffer = ByteBuffer.allocateDirect(format.getRatingSize());
  }
  @Test
  public void testSingleEntry() throws IOException {
    File file = folder.newFile();
    FileChannel chan = new RandomAccessFile(file, "rw").getChannel();
    BinaryIndexTableWriter w = BinaryIndexTableWriter.create(BinaryFormat.create(), chan, 1);
    w.writeEntry(42, new int[] {0});

    MappedByteBuffer buf = chan.map(FileChannel.MapMode.READ_ONLY, 0, chan.size());
    BinaryIndexTable tbl = BinaryIndexTable.fromBuffer(1, buf);
    assertThat(tbl.getKeys(), contains(42L));
    assertThat(tbl.getEntry(42), contains(0));
    assertThat(tbl.getEntry(43), nullValue());
  }
示例#6
0
 /**
  * Parse a header from a byte buffer. The buffer's position will be advanced to the end of the
  * header.
  *
  * @param buf The byte buffer.
  * @return The header.
  */
 public static BinaryHeader fromHeader(ByteBuffer buf) {
   Preconditions.checkArgument(buf.remaining() >= HEADER_SIZE, "buffer not large enough");
   byte[] magic = new byte[2];
   buf.get(magic);
   if (!Arrays.equals(magic, BinaryFormat.HEADER_MAGIC)) {
     throw new IllegalArgumentException("invalid magic");
   }
   short word = buf.getShort();
   BinaryFormat format = BinaryFormat.fromFlags(word);
   int nratings = buf.getInt();
   int nusers = buf.getInt();
   int nitems = buf.getInt();
   return new BinaryHeader(format, nratings, nusers, nitems);
 }
示例#7
0
  private void checkUpgrade(long uid, long iid) throws IOException {
    Set<PackHeaderFlag> toRemove = null;
    if (!format.userIdIsValid(uid)) {
      assert format.hasCompactUsers();
      toRemove = EnumSet.of(PackHeaderFlag.COMPACT_USERS);
    }
    if (!format.itemIdIsValid(iid)) {
      assert format.hasCompactItems();
      if (toRemove == null) {
        toRemove = EnumSet.of(PackHeaderFlag.COMPACT_ITEMS);
      } else {
        toRemove.add(PackHeaderFlag.COMPACT_ITEMS);
      }
    }

    if (toRemove != null) {
      Set<PackHeaderFlag> newFlags = EnumSet.copyOf(format.getFlags());
      newFlags.removeAll(toRemove);
      BinaryFormat newFormat = BinaryFormat.createWithFlags(newFlags);
      if (newFormat != format) {
        upgradeRatings(newFormat);
      }
    }
  }
示例#8
0
 private long ratingPos(int idx) {
   long offset = format.getHeaderSize();
   return offset + idx * format.getRatingSize();
 }
示例#9
0
  private void upgradeRatings(BinaryFormat newFormat) throws IOException {
    Preconditions.checkArgument(
        newFormat.getRatingSize() > format.getRatingSize(), "new format is not wider than old");
    logger.info("upgrading {} ratings from {} to {}", index, format, newFormat);

    ByteBuffer oldBuffer = ByteBuffer.allocateDirect(format.getRatingSize());
    ByteBuffer newBuffer = ByteBuffer.allocateDirect(newFormat.getRatingSize());
    MutableRating scratch = new MutableRating();

    long oldPos = BinaryHeader.HEADER_SIZE + index * format.getRatingSize();
    Preconditions.checkState(channel.position() == oldPos, "channel is at the wrong position");
    long newPos = BinaryHeader.HEADER_SIZE + index * newFormat.getRatingSize();
    channel.position(newPos);
    // loop backwards, coping each rating to later in the file
    for (int i = index - 1; i >= 0; i--) {
      oldPos -= format.getRatingSize();
      newPos -= newFormat.getRatingSize();

      // read the old rating
      BinaryUtils.readBuffer(channel, oldBuffer, oldPos);
      oldBuffer.flip();
      format.readRating(oldBuffer, scratch);
      oldBuffer.clear();

      // write the new rating
      newFormat.renderRating(scratch, newBuffer);
      newBuffer.flip();
      BinaryUtils.writeBuffer(channel, newBuffer, newPos);
      newBuffer.clear();
    }
    assert oldPos == BinaryHeader.HEADER_SIZE;
    assert newPos == BinaryHeader.HEADER_SIZE;
    format = newFormat;
    ratingBuffer = ByteBuffer.allocateDirect(newFormat.getRatingSize());
  }
示例#10
0
 public int getRatingDataSize() {
   return getRatingCount() * format.getRatingSize();
 }