コード例 #1
0
  public ImageSearchHits search(Document doc, IndexReader reader) throws IOException {
    ScalableColor sc = null;
    ColorLayout cl = null;
    EdgeHistogram eh = null;

    String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    if (cls != null && cls.length > 0) {
      cl = new ColorLayout();
      cl.setStringRepresentation(cls[0]);
    }
    String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
    if (scs != null && scs.length > 0) {
      sc = new ScalableColor();
      sc.setStringRepresentation(scs[0]);
    }
    String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
    if (ehs != null && ehs.length > 0) {
      eh = new EdgeHistogram();
      eh.setStringRepresentation(ehs[0]);
    }

    float maxDistance = findSimilar(reader, cl, sc, eh);

    return new SimpleImageSearchHits(this.docs, maxDistance);
  }
コード例 #2
0
  public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    if (!IndexReader.indexExists(reader.directory()))
      throw new FileNotFoundException("No index found at this specific location.");
    Document doc = reader.document(0);
    ScalableColor sc = null;
    ColorLayout cl = null;
    EdgeHistogram eh = null;

    String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    if (cls != null && cls.length > 0) {
      cl = new ColorLayout();
      cl.setStringRepresentation(cls[0]);
    }
    String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
    if (scs != null && scs.length > 0) {
      sc = new ScalableColor();
      sc.setStringRepresentation(scs[0]);
    }
    String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
    if (ehs != null && ehs.length > 0) {
      eh = new EdgeHistogram();
      eh.setStringRepresentation(ehs[0]);
    }

    HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

    // find duplicates ...
    boolean hasDeletions = reader.hasDeletions();

    int docs = reader.numDocs();
    int numDuplicates = 0;
    for (int i = 0; i < docs; i++) {
      if (hasDeletions && reader.isDeleted(i)) {
        continue;
      }
      Document d = reader.document(i);
      float distance = getDistance(d, cl, sc, eh);

      if (!duplicates.containsKey(distance)) {
        duplicates.put(distance, new LinkedList<String>());
      } else {
        numDuplicates++;
      }
      duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    if (numDuplicates == 0) return null;

    LinkedList<List<String>> results = new LinkedList<List<String>>();
    for (float f : duplicates.keySet()) {
      if (duplicates.get(f).size() > 1) {
        results.add(duplicates.get(f));
      }
    }
    return new SimpleImageDuplicates(results);
  }
コード例 #3
0
  private float getDistance(Document d, ColorLayout cl, ScalableColor sc, EdgeHistogram eh) {
    float distance = 0f;
    int descriptorCount = 0;

    if (cl != null) {
      String[] cls = d.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
      if (cls != null && cls.length > 0) {
        ColorLayout clsi = new ColorLayout();
        clsi.setStringRepresentation(cls[0]);
        distance += cl.getDistance(clsi) * colorDistributionWeight;
        descriptorCount++;
      }
    }

    if (sc != null) {
      String[] scs = d.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
      if (scs != null && scs.length > 0) {
        ScalableColor scsi = new ScalableColor();
        scsi.setStringRepresentation(scs[0]);
        distance += sc.getDistance(scsi) * colorHistogramWeight;
        descriptorCount++;
      }
    }

    if (eh != null) {
      String[] ehs = d.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
      if (ehs != null && ehs.length > 0) {
        EdgeHistogram ehsi = new EdgeHistogram();
        ehsi.setStringRepresentation(ehs[0]);
        distance += eh.getDistance(ehsi) * textureWeight;
        descriptorCount++;
      }
    }

    if (descriptorCount > 0) {
      // TODO: find some better scoring mechanism, e.g. some normalization. One thing would be
      // linearization of the features!
      // For now: Averaging ...
      distance = distance / (float) descriptorCount;
    }
    return distance;
  }