/** * Fingerprint the file into chunks called "Fingers". The chunk boundaries are determined using a * windowed fingerprinter {@link RabinFingerprintLongWindowed}. * * <p>The chunk detector is position independent. Therefore, even if a file is rearranged or * partially corrupted, the untouched chunks can be efficiently discovered. */ public void getChunkFingerprints(InputStream is, ChunkVisitor visitor) throws IOException { // windowing fingerprinter for finding chunk boundaries. this is only // reset at the beginning of the file final RabinFingerprintLong window = newWindowedFingerprint(); // fingerprinter for chunks. this is reset after each chunk final RabinFingerprintLong finger = newFingerprint(); // counters long chunkStart = 0; long chunkEnd = 0; /* * fingerprint one byte at a time. we have to use this granularity to * ensure that, for example, a one byte offset at the beginning of the * file won't effect the chunk boundaries */ for (byte b : ByteStreams.toByteArray(is)) { // push byte into fingerprints window.pushByte(b); finger.pushByte(b); chunkEnd++; /* * if we've reached a boundary (which we will at some probability * based on the boundary pattern and the size of the fingerprint * window), we store the current chunk fingerprint and reset the * chunk fingerprinter. */ if (boundaryDetector.isBoundary(window)) { visitor.visit(finger.getFingerprintLong(), chunkStart, chunkEnd); finger.reset(); // store last chunk offset chunkStart = chunkEnd; } } // final chunk visitor.visit(finger.getFingerprintLong(), chunkStart, chunkEnd); }
public void fingerprintFiles(List<String> paths, Polynomial p) throws FileNotFoundException, IOException { final RabinFingerprintLong rabin = new RabinFingerprintLong(p); for (String path : paths) { File file = new File(path); if (file.exists()) { rabin.reset(); rabin.pushBytes(ByteStreams.toByteArray(new FileInputStream(file))); System.out.println( String.format("%X %s", rabin.getFingerprintLong(), file.getAbsolutePath())); System.out.flush(); } else { System.err.print(String.format("Could not find file %s", path)); System.err.flush(); } } }
/** Rapidly fingerprint an entire stream's contents. */ public long getFullFingerprint(InputStream is) throws IOException { final RabinFingerprintLong finger = newFingerprint(); finger.pushBytes(ByteStreams.toByteArray(is)); return finger.getFingerprintLong(); }
public void fingerprintStdin(Polynomial p) throws IOException { final RabinFingerprintLong rabin = new RabinFingerprintLong(p); rabin.pushBytes(ByteStreams.toByteArray(System.in)); System.out.println(String.format("%X", rabin.getFingerprintLong())); }