示例#1
0
  /**
   * Fingerprint the file into chunks called "Fingers". The chunk boundaries are determined using a
   * windowed fingerprinter {@link RabinFingerprintLongWindowed}.
   *
   * <p>The chunk detector is position independent. Therefore, even if a file is rearranged or
   * partially corrupted, the untouched chunks can be efficiently discovered.
   */
  public void getChunkFingerprints(InputStream is, ChunkVisitor visitor) throws IOException {
    // windowing fingerprinter for finding chunk boundaries. this is only
    // reset at the beginning of the file
    final RabinFingerprintLong window = newWindowedFingerprint();

    // fingerprinter for chunks. this is reset after each chunk
    final RabinFingerprintLong finger = newFingerprint();

    // counters
    long chunkStart = 0;
    long chunkEnd = 0;

    /*
     * fingerprint one byte at a time. we have to use this granularity to
     * ensure that, for example, a one byte offset at the beginning of the
     * file won't effect the chunk boundaries
     */
    for (byte b : ByteStreams.toByteArray(is)) {
      // push byte into fingerprints
      window.pushByte(b);
      finger.pushByte(b);
      chunkEnd++;

      /*
       * if we've reached a boundary (which we will at some probability
       * based on the boundary pattern and the size of the fingerprint
       * window), we store the current chunk fingerprint and reset the
       * chunk fingerprinter.
       */
      if (boundaryDetector.isBoundary(window)) {
        visitor.visit(finger.getFingerprintLong(), chunkStart, chunkEnd);
        finger.reset();

        // store last chunk offset
        chunkStart = chunkEnd;
      }
    }

    // final chunk
    visitor.visit(finger.getFingerprintLong(), chunkStart, chunkEnd);
  }
示例#2
0
 public void fingerprintFiles(List<String> paths, Polynomial p)
     throws FileNotFoundException, IOException {
   final RabinFingerprintLong rabin = new RabinFingerprintLong(p);
   for (String path : paths) {
     File file = new File(path);
     if (file.exists()) {
       rabin.reset();
       rabin.pushBytes(ByteStreams.toByteArray(new FileInputStream(file)));
       System.out.println(
           String.format("%X %s", rabin.getFingerprintLong(), file.getAbsolutePath()));
       System.out.flush();
     } else {
       System.err.print(String.format("Could not find file %s", path));
       System.err.flush();
     }
   }
 }
示例#3
0
 /** Rapidly fingerprint an entire stream's contents. */
 public long getFullFingerprint(InputStream is) throws IOException {
   final RabinFingerprintLong finger = newFingerprint();
   finger.pushBytes(ByteStreams.toByteArray(is));
   return finger.getFingerprintLong();
 }
示例#4
0
 public void fingerprintStdin(Polynomial p) throws IOException {
   final RabinFingerprintLong rabin = new RabinFingerprintLong(p);
   rabin.pushBytes(ByteStreams.toByteArray(System.in));
   System.out.println(String.format("%X", rabin.getFingerprintLong()));
 }