public Document createDocument(BufferedImage image, String identifier) {
    assert (image != null);
    BufferedImage bimg = image;
    // Scaling image is especially with the correlogram features very important!
    // All images are scaled to guarantee a certain upper limit for indexing.
    if (Math.max(image.getHeight(), image.getWidth()) > MAX_IMAGE_DIMENSION) {
      bimg = ImageUtils.scaleImage(image, MAX_IMAGE_DIMENSION);
    }
    Document doc = null;
    logger.finer("Starting extraction from image [CEDD - fast].");
    CEDD vd = new CEDD();
    vd.extract(bimg);
    logger.fine("Extraction finished [CEDD - fast].");

    doc = new Document();
    doc.add(new Field(DocumentBuilder.FIELD_NAME_CEDD, vd.getByteArrayRepresentation()));
    if (identifier != null)
      doc.add(
          new Field(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              identifier,
              Field.Store.YES,
              Field.Index.NOT_ANALYZED));

    return doc;
  }
  public double singleSearch(int docNum)
      throws IOException, InstantiationException, IllegalAccessException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));

    // -----------

    String query = reader.document(docNum).getValues("hash")[0];
    CEDD ceddQuery = new CEDD();
    ceddQuery.setByteArrayRepresentation(
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes,
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset,
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length);

    // -----------

    HashSet<String> gold = new HashSet<String>(numImagesEval);
    ImageSearcher cis = ImageSearcherFactory.createCEDDImageSearcher(100);
    ImageSearchHits hits = cis.search(reader.document(docNum), reader);
    for (int i = 0; i < 10; i++) {
      gold.add(hits.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
    }

    // ------------

    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(
        new SimilarityBase() {
          @Override
          protected float score(BasicStats basicStats, float freq, float v2) {
            return 1;
          }

          @Override
          public String toString() {
            return null;
          }
        });
    TopDocs topDocs = searcher.search(createQuery(query), 500);
    topDocs = rerank(topDocs, ceddQuery, reader);
    //        System.out.println("topDocs.scoreDocs.length = " + topDocs.scoreDocs.length);
    double numMatches = 0;
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
      ScoreDoc scoreDoc = topDocs.scoreDocs[i];
      //            System.out.print(scoreDoc.score + ": ");
      String file =
          reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
      //            System.out.println(file.substring(file.lastIndexOf('/') + 1) +
      // (gold.contains(file)?" x":" o"));
      if (gold.contains(file)) numMatches++;
    }
    return numMatches;
  }
Beispiel #3
0
  public void tttestGetDistribution() throws IOException {
    BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv"));
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
    // get the first document:
    //        if (!IndexReader.indexExists(reader.directory()))
    //            throw new FileNotFoundException("No index found at this specific location.");

    CEDD cedd1 = new CEDD();
    FCTH fcth1 = new FCTH();

    CEDD cedd2 = new CEDD();
    FCTH fcth2 = new FCTH();

    JCD jcd1 = new JCD();
    JCD jcd2 = new JCD();
    String[] cls;

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.

      Document doc = reader.document(i);
      cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD);
      if (cls != null && cls.length > 0) cedd1.setStringRepresentation(cls[0]);
      cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH);
      if (cls != null && cls.length > 0) fcth1.setStringRepresentation(cls[0]);

      for (int j = i + 1; j < docs; j++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
          continue; // if it is deleted, just ignore it.
        Document doc2 = reader.document(j);
        cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD);
        if (cls != null && cls.length > 0) cedd2.setStringRepresentation(cls[0]);
        cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH);
        if (cls != null && cls.length > 0) fcth2.setStringRepresentation(cls[0]);
        jcd1.init(cedd1, fcth1);
        jcd2.init(cedd2, fcth2);
        bw.write(
            cedd1.getDistance(cedd2)
                + ";"
                + fcth1.getDistance(fcth2)
                + ";"
                + jcd1.getDistance(jcd2)
                + "\n");
      }
      if (i % 100 == 0) System.out.println(i + " entries processed ... ");
    }
    bw.close();
  }
  public double testIndexing() throws IOException, IllegalAccessException, InstantiationException {
    LocalitySensitiveHashing.generateHashFunctions();
    LocalitySensitiveHashing.readHashFunctions();
    DocumentBuilder builder = new ChainedDocumentBuilder();
    ((ChainedDocumentBuilder) builder).addBuilder(DocumentBuilderFactory.getCEDDDocumentBuilder());

    //        System.out.println("-< Getting files to index >--------------");
    ArrayList<String> images = FileUtils.getAllImages(new File(testExtensive), true);
    //        System.out.println("-< Indexing " + images.size() + " files >--------------");

    IndexWriter iw =
        LuceneUtils.createIndexWriter(indexPath, true, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
    int count = 0;
    long time = System.currentTimeMillis();
    for (String identifier : images) {
      CEDD cedd = new CEDD();
      cedd.extract(ImageIO.read(new FileInputStream(identifier)));
      Document doc = new Document();
      doc.add(new Field(DocumentBuilder.FIELD_NAME_CEDD, cedd.getByteArrayRepresentation()));
      doc.add(
          new Field(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              identifier,
              Field.Store.YES,
              Field.Index.NOT_ANALYZED));
      int[] hashes = LocalitySensitiveHashing.generateHashes(cedd.getDoubleHistogram());
      StringBuilder hash = new StringBuilder(512);
      for (int i = 0; i < hashes.length; i++) {
        hash.append(hashes[i]);
        hash.append(' ');
      }
      //            System.out.println("hash = " + hash);
      doc.add(new Field("hash", hash.toString(), Field.Store.YES, Field.Index.ANALYZED));
      iw.addDocument(doc);
      count++;
      //            if (count % 100 == 0) System.out.println(count + " files indexed.");
    }
    long timeTaken = (System.currentTimeMillis() - time);
    float sec = ((float) timeTaken) / 1000f;

    //        System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
    iw.close();

    return testSearch();
  }
  public void testOutputSearchResults()
      throws IOException, InstantiationException, IllegalAccessException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int docNum = 0; // doc to search for.
    // -----------

    String query = reader.document(docNum).getValues("hash")[0];
    CEDD ceddQuery = new CEDD();
    ceddQuery.setByteArrayRepresentation(
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes,
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset,
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length);

    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(createQuery(query), numImagesEval);
    FileUtils.saveImageResultsToPng(
        "result_lsh",
        topDocs,
        reader.document(docNum).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0],
        reader);
  }