public Document createDocument(BufferedImage image, String identifier) { assert (image != null); BufferedImage bimg = image; // Scaling image is especially with the correlogram features very important! // All images are scaled to guarantee a certain upper limit for indexing. if (Math.max(image.getHeight(), image.getWidth()) > MAX_IMAGE_DIMENSION) { bimg = ImageUtils.scaleImage(image, MAX_IMAGE_DIMENSION); } Document doc = null; logger.finer("Starting extraction from image [CEDD - fast]."); CEDD vd = new CEDD(); vd.extract(bimg); logger.fine("Extraction finished [CEDD - fast]."); doc = new Document(); doc.add(new Field(DocumentBuilder.FIELD_NAME_CEDD, vd.getByteArrayRepresentation())); if (identifier != null) doc.add( new Field( DocumentBuilder.FIELD_NAME_IDENTIFIER, identifier, Field.Store.YES, Field.Index.NOT_ANALYZED)); return doc; }
public double singleSearch(int docNum) throws IOException, InstantiationException, IllegalAccessException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); // ----------- String query = reader.document(docNum).getValues("hash")[0]; CEDD ceddQuery = new CEDD(); ceddQuery.setByteArrayRepresentation( reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length); // ----------- HashSet<String> gold = new HashSet<String>(numImagesEval); ImageSearcher cis = ImageSearcherFactory.createCEDDImageSearcher(100); ImageSearchHits hits = cis.search(reader.document(docNum), reader); for (int i = 0; i < 10; i++) { gold.add(hits.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); } // ------------ IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity( new SimilarityBase() { @Override protected float score(BasicStats basicStats, float freq, float v2) { return 1; } @Override public String toString() { return null; } }); TopDocs topDocs = searcher.search(createQuery(query), 500); topDocs = rerank(topDocs, ceddQuery, reader); // System.out.println("topDocs.scoreDocs.length = " + topDocs.scoreDocs.length); double numMatches = 0; for (int i = 0; i < topDocs.scoreDocs.length; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; // System.out.print(scoreDoc.score + ": "); String file = reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; // System.out.println(file.substring(file.lastIndexOf('/') + 1) + // (gold.contains(file)?" x":" o")); if (gold.contains(file)) numMatches++; } return numMatches; }
public void tttestGetDistribution() throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv")); IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); // get the first document: // if (!IndexReader.indexExists(reader.directory())) // throw new FileNotFoundException("No index found at this specific location."); CEDD cedd1 = new CEDD(); FCTH fcth1 = new FCTH(); CEDD cedd2 = new CEDD(); FCTH fcth2 = new FCTH(); JCD jcd1 = new JCD(); JCD jcd2 = new JCD(); String[] cls; // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document doc = reader.document(i); cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD); if (cls != null && cls.length > 0) cedd1.setStringRepresentation(cls[0]); cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH); if (cls != null && cls.length > 0) fcth1.setStringRepresentation(cls[0]); for (int j = i + 1; j < docs; j++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document doc2 = reader.document(j); cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD); if (cls != null && cls.length > 0) cedd2.setStringRepresentation(cls[0]); cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH); if (cls != null && cls.length > 0) fcth2.setStringRepresentation(cls[0]); jcd1.init(cedd1, fcth1); jcd2.init(cedd2, fcth2); bw.write( cedd1.getDistance(cedd2) + ";" + fcth1.getDistance(fcth2) + ";" + jcd1.getDistance(jcd2) + "\n"); } if (i % 100 == 0) System.out.println(i + " entries processed ... "); } bw.close(); }
public double testIndexing() throws IOException, IllegalAccessException, InstantiationException { LocalitySensitiveHashing.generateHashFunctions(); LocalitySensitiveHashing.readHashFunctions(); DocumentBuilder builder = new ChainedDocumentBuilder(); ((ChainedDocumentBuilder) builder).addBuilder(DocumentBuilderFactory.getCEDDDocumentBuilder()); // System.out.println("-< Getting files to index >--------------"); ArrayList<String> images = FileUtils.getAllImages(new File(testExtensive), true); // System.out.println("-< Indexing " + images.size() + " files >--------------"); IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); int count = 0; long time = System.currentTimeMillis(); for (String identifier : images) { CEDD cedd = new CEDD(); cedd.extract(ImageIO.read(new FileInputStream(identifier))); Document doc = new Document(); doc.add(new Field(DocumentBuilder.FIELD_NAME_CEDD, cedd.getByteArrayRepresentation())); doc.add( new Field( DocumentBuilder.FIELD_NAME_IDENTIFIER, identifier, Field.Store.YES, Field.Index.NOT_ANALYZED)); int[] hashes = LocalitySensitiveHashing.generateHashes(cedd.getDoubleHistogram()); StringBuilder hash = new StringBuilder(512); for (int i = 0; i < hashes.length; i++) { hash.append(hashes[i]); hash.append(' '); } // System.out.println("hash = " + hash); doc.add(new Field("hash", hash.toString(), Field.Store.YES, Field.Index.ANALYZED)); iw.addDocument(doc); count++; // if (count % 100 == 0) System.out.println(count + " files indexed."); } long timeTaken = (System.currentTimeMillis() - time); float sec = ((float) timeTaken) / 1000f; // System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image."); iw.close(); return testSearch(); }
public void testOutputSearchResults() throws IOException, InstantiationException, IllegalAccessException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int docNum = 0; // doc to search for. // ----------- String query = reader.document(docNum).getValues("hash")[0]; CEDD ceddQuery = new CEDD(); ceddQuery.setByteArrayRepresentation( reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(createQuery(query), numImagesEval); FileUtils.saveImageResultsToPng( "result_lsh", topDocs, reader.document(docNum).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0], reader); }