private int[] getHashes(LireFeature feature) { // result = new int[maximumHits]; hashingResultScoreDocs.clear(); maxDistance = 0f; tmpScore = 0f; int rep = 0; LireFeature tmpFeature; for (Iterator<LireFeature> iterator = representatives.iterator(); iterator.hasNext(); ) { tmpFeature = iterator.next(); tmpScore = tmpFeature.getDistance(feature); if (hashingResultScoreDocs.size() < maximumHits) { hashingResultScoreDocs.add(new SimpleResult(tmpScore, null, rep)); maxDistance = Math.max(maxDistance, tmpScore); } else if (tmpScore < maxDistance) { hashingResultScoreDocs.add(new SimpleResult(tmpScore, null, rep)); } while (hashingResultScoreDocs.size() > maximumHits) { hashingResultScoreDocs.remove(hashingResultScoreDocs.last()); maxDistance = hashingResultScoreDocs.last().getDistance(); } rep++; } rep = 0; for (Iterator<SimpleResult> iterator = hashingResultScoreDocs.iterator(); iterator.hasNext(); ) { SimpleResult next = iterator.next(); result[rep] = next.getIndexNumber(); rep++; } return result; }
private void readIndexInputFullyWithRandomSeeks(IndexInput indexInput) throws IOException { BytesRef ref = new BytesRef(scaledRandomIntBetween(1, 1024)); long pos = 0; while (pos < indexInput.length()) { assertEquals(pos, indexInput.getFilePointer()); int op = random().nextInt(5); if (op == 0) { int shift = 100 - randomIntBetween(0, 200); pos = Math.min(indexInput.length() - 1, Math.max(0, pos + shift)); indexInput.seek(pos); } else if (op == 1) { indexInput.readByte(); pos++; } else { int min = (int) Math.min(indexInput.length() - pos, ref.bytes.length); indexInput.readBytes(ref.bytes, ref.offset, min); pos += min; } } }
public void run() { // do it ... try { IndexWriter indexWriter = LuceneUtils.createIndexWriter( indexPath, overwriteIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext(); ) { File inputFile = iterator.next(); if (verbose) System.out.println("Processing " + inputFile.getPath() + "."); if (verbose) System.out.println("Counting images."); run = 0; readFile(indexWriter, inputFile); if (verbose) System.out.printf("%d images found in the data file.\n", docCount); int numberOfRepresentatives = 1000; // TODO: clever selection. // select a number of representative "fixed stars" randomly from file if (numberOfRepresentatives > Math.sqrt(docCount)) numberOfRepresentatives = (int) Math.sqrt(docCount); if (verbose) System.out.printf( "Selecting %d representative images for hashing.\n", numberOfRepresentatives); representativesID = new HashSet<Integer>(numberOfRepresentatives); while (representativesID.size() < numberOfRepresentatives) { representativesID.add((int) Math.floor(Math.random() * (docCount - 1))); } representatives = new ArrayList<LireFeature>(numberOfRepresentatives); docCount = 0; run = 1; if (verbose) System.out.println("Now getting representatives from the data file."); readFile(indexWriter, inputFile); docCount = 0; run = 2; if (verbose) System.out.println("Finally we start the indexing process, please wait ..."); readFile(indexWriter, inputFile); if (verbose) System.out.println("Indexing finished."); } indexWriter.commit(); indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } }
public LocalFeatureExtractor extractLocalFeatures( BufferedImage image, LocalFeatureExtractor localFeatureExtractor) { assert (image != null); // Scaling image is especially with the correlogram features very important! // All images are scaled to guarantee a certain upper limit for indexing. if (Math.max(image.getHeight(), image.getWidth()) > DocumentBuilder.MAX_IMAGE_DIMENSION) { image = ImageUtils.scaleImage(image, DocumentBuilder.MAX_IMAGE_DIMENSION); } localFeatureExtractor.extract(image); return localFeatureExtractor; }
private void corruptFile(Directory dir, String fileIn, String fileOut) throws IOException { IndexInput input = dir.openInput(fileIn, IOContext.READONCE); IndexOutput output = dir.createOutput(fileOut, IOContext.DEFAULT); long len = input.length(); byte[] b = new byte[1024]; long broken = randomInt((int) len); long pos = 0; while (pos < len) { int min = (int) Math.min(input.length() - pos, b.length); input.readBytes(b, 0, min); if (broken >= pos && broken < pos + min) { // Flip one byte int flipPos = (int) (broken - pos); b[flipPos] = (byte) (b[flipPos] ^ 42); } output.writeBytes(b, min); pos += min; } IOUtils.close(input, output); }
@Test public void testVerifyingIndexOutput() throws IOException { Directory dir = newDirectory(); IndexOutput output = dir.createOutput("foo.bar", IOContext.DEFAULT); int iters = scaledRandomIntBetween(10, 100); for (int i = 0; i < iters; i++) { BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024)); output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length); } CodecUtil.writeFooter(output); output.close(); IndexInput indexInput = dir.openInput("foo.bar", IOContext.DEFAULT); String checksum = Store.digestToString(CodecUtil.retrieveChecksum(indexInput)); indexInput.seek(0); BytesRef ref = new BytesRef(scaledRandomIntBetween(1, 1024)); long length = indexInput.length(); IndexOutput verifyingOutput = new Store.LuceneVerifyingIndexOutput( new StoreFileMetaData("foo1.bar", length, checksum), dir.createOutput("foo1.bar", IOContext.DEFAULT)); while (length > 0) { if (random().nextInt(10) == 0) { verifyingOutput.writeByte(indexInput.readByte()); length--; } else { int min = (int) Math.min(length, ref.bytes.length); indexInput.readBytes(ref.bytes, ref.offset, min); verifyingOutput.writeBytes(ref.bytes, ref.offset, min); length -= min; } } Store.verify(verifyingOutput); verifyingOutput.writeByte((byte) 0x0); try { Store.verify(verifyingOutput); fail("should be a corrupted index"); } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) { // ok } IOUtils.close(indexInput, verifyingOutput, dir); }