public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { ScalableColor sc = null; ColorLayout cl = null; EdgeHistogram eh = null; String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT); if (cls != null && cls.length > 0) { cl = new ColorLayout(); cl.setStringRepresentation(cls[0]); } String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR); if (scs != null && scs.length > 0) { sc = new ScalableColor(); sc.setStringRepresentation(scs[0]); } String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM); if (ehs != null && ehs.length > 0) { eh = new EdgeHistogram(); eh.setStringRepresentation(ehs[0]); } float maxDistance = findSimilar(reader, cl, sc, eh); return new SimpleImageSearchHits(this.docs, maxDistance); }
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: if (!IndexReader.indexExists(reader.directory())) throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); ScalableColor sc = null; ColorLayout cl = null; EdgeHistogram eh = null; String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT); if (cls != null && cls.length > 0) { cl = new ColorLayout(); cl.setStringRepresentation(cls[0]); } String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR); if (scs != null && scs.length > 0) { sc = new ScalableColor(); sc.setStringRepresentation(scs[0]); } String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM); if (ehs != null && ehs.length > 0) { eh = new EdgeHistogram(); eh.setStringRepresentation(ehs[0]); } HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // find duplicates ... boolean hasDeletions = reader.hasDeletions(); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (hasDeletions && reader.isDeleted(i)) { continue; } Document d = reader.document(i); float distance = getDistance(d, cl, sc, eh); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } return new SimpleImageDuplicates(results); }
private float getDistance(Document d, ColorLayout cl, ScalableColor sc, EdgeHistogram eh) { float distance = 0f; int descriptorCount = 0; if (cl != null) { String[] cls = d.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT); if (cls != null && cls.length > 0) { ColorLayout clsi = new ColorLayout(); clsi.setStringRepresentation(cls[0]); distance += cl.getDistance(clsi) * colorDistributionWeight; descriptorCount++; } } if (sc != null) { String[] scs = d.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR); if (scs != null && scs.length > 0) { ScalableColor scsi = new ScalableColor(); scsi.setStringRepresentation(scs[0]); distance += sc.getDistance(scsi) * colorHistogramWeight; descriptorCount++; } } if (eh != null) { String[] ehs = d.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM); if (ehs != null && ehs.length > 0) { EdgeHistogram ehsi = new EdgeHistogram(); ehsi.setStringRepresentation(ehs[0]); distance += eh.getDistance(ehsi) * textureWeight; descriptorCount++; } } if (descriptorCount > 0) { // TODO: find some better scoring mechanism, e.g. some normalization. One thing would be // linearization of the features! // For now: Averaging ... distance = distance / (float) descriptorCount; } return distance; }