Esempio n. 1
0
 private void indexFiles(String dir, String index, int featureIndex, boolean createNewIndex)
     throws IOException {
   ArrayList<String> images = FileUtils.getAllImages(new File(dir), true);
   IndexWriter iw =
       LuceneUtils.createIndexWriter(
           index, createNewIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
   // select one feature for the large index:
   int count = 0;
   long ms = System.currentTimeMillis();
   DocumentBuilder builder = new ChainedDocumentBuilder();
   ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]);
   //        ((ChainedDocumentBuilder) builder).addBuilder(builders[0]);
   for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) {
     count++;
     if (count > 100 && count % 5000 == 0) {
       System.out.println(
           count
               + " files indexed. "
               + (System.currentTimeMillis() - ms) / (count)
               + " ms per file");
     }
     String file = iterator.next();
     try {
       iw.addDocument(builder.createDocument(new FileInputStream(file), file));
     } catch (Exception e) {
       System.err.println("Error: " + e.getMessage());
     }
   }
   iw.close();
 }
Esempio n. 2
0
  private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath)
      throws IOException {
    // eventually check if the directory is there or not ...
    IndexWriter iw = LuceneUtils.createIndexWriter(testIndex, false);
    int count = 0;
    long time = System.currentTimeMillis();
    for (String identifier : images) {
      // TODO: cut toes from the image ... -> doesn't work out very well. Stable at first,
      // decreasing then.
      // TODO: Joint Histogram ...
      // TODO: LSA / PCA on the vectors ...-> this looks like a job for me :-D
      // TODO: local features ...
      Document doc = null;
      if (cutImages) {
        BufferedImage bimg =
            ImageUtils.cropImage(ImageIO.read(new FileInputStream(identifier)), 0, 0, 200, 69);
        doc = builder.createDocument(bimg, identifier);
      } else doc = builder.createDocument(new FileInputStream(identifier), identifier);
      iw.addDocument(doc);
      count++;
      if (count % 100 == 0) {
        int percent = (int) Math.floor(((double) count * 100.0) / (double) images.size());
        double timeTemp = (double) (System.currentTimeMillis() - time) / 1000d;
        int secsLeft =
            (int) Math.round(((timeTemp / (double) count) * (double) images.size()) - timeTemp);
        System.out.println(percent + "% finished (" + count + " files), " + secsLeft + " s left");
      }
    }
    long timeTaken = (System.currentTimeMillis() - time);
    float sec = ((float) timeTaken) / 1000f;

    System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
    iw.commit();
    iw.close();
  }
 public void testSurfIndexing() throws IOException {
   ArrayList<String> images = FileUtils.getAllImages(new File(testExtensive), true);
   ChainedDocumentBuilder db = new ChainedDocumentBuilder();
   db.addBuilder(new SurfDocumentBuilder());
   IndexWriter iw = LuceneUtils.createIndexWriter("sift-idx", true);
   for (int i = 0; i < images.size(); i++) {
     //            int sampleQuery = sampleQueries[i];
     //            String s = testExtensive + "/" + sampleQuery + ".jpg";
     iw.addDocument(db.createDocument(new FileInputStream(images.get(i)), images.get(i)));
     if (i % 100 == 99) System.out.print(".");
     if (i % 1000 == 999) System.out.print(" ~ " + i + " files indexed\n");
     if (i > 1000) break;
   }
   System.out.println("");
   iw.close();
 }
  public double testIndexing() throws IOException, IllegalAccessException, InstantiationException {
    LocalitySensitiveHashing.generateHashFunctions();
    LocalitySensitiveHashing.readHashFunctions();
    DocumentBuilder builder = new ChainedDocumentBuilder();
    ((ChainedDocumentBuilder) builder).addBuilder(DocumentBuilderFactory.getCEDDDocumentBuilder());

    //        System.out.println("-< Getting files to index >--------------");
    ArrayList<String> images = FileUtils.getAllImages(new File(testExtensive), true);
    //        System.out.println("-< Indexing " + images.size() + " files >--------------");

    IndexWriter iw =
        LuceneUtils.createIndexWriter(indexPath, true, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
    int count = 0;
    long time = System.currentTimeMillis();
    for (String identifier : images) {
      CEDD cedd = new CEDD();
      cedd.extract(ImageIO.read(new FileInputStream(identifier)));
      Document doc = new Document();
      doc.add(new Field(DocumentBuilder.FIELD_NAME_CEDD, cedd.getByteArrayRepresentation()));
      doc.add(
          new Field(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              identifier,
              Field.Store.YES,
              Field.Index.NOT_ANALYZED));
      int[] hashes = LocalitySensitiveHashing.generateHashes(cedd.getDoubleHistogram());
      StringBuilder hash = new StringBuilder(512);
      for (int i = 0; i < hashes.length; i++) {
        hash.append(hashes[i]);
        hash.append(' ');
      }
      //            System.out.println("hash = " + hash);
      doc.add(new Field("hash", hash.toString(), Field.Store.YES, Field.Index.ANALYZED));
      iw.addDocument(doc);
      count++;
      //            if (count % 100 == 0) System.out.println(count + " files indexed.");
    }
    long timeTaken = (System.currentTimeMillis() - time);
    float sec = ((float) timeTaken) / 1000f;

    //        System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
    iw.close();

    return testSearch();
  }
Esempio n. 5
0
 private Document indexFiles() throws IOException {
   System.out.println("---< indexing >-------------------------");
   int count = 0;
   DocumentBuilder builder = getDocumentBuilder();
   ArrayList<String> allImages = FileUtils.getAllImages(new File("wang-1000"), true);
   IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true);
   Document document = null;
   for (Iterator<String> iterator = allImages.iterator(); iterator.hasNext(); ) {
     String filename = iterator.next();
     BufferedImage image = ImageIO.read(new FileInputStream(filename));
     document = builder.createDocument(image, filename);
     iw.addDocument(document);
     count++;
     if (count % 50 == 0)
       System.out.println("finished " + (count * 100) / allImages.size() + "% of the images.");
   }
   iw.close();
   return document;
 }
 public void run() {
   // do it ...
   try {
     IndexWriter indexWriter =
         LuceneUtils.createIndexWriter(
             indexPath, overwriteIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
     for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext(); ) {
       File inputFile = iterator.next();
       if (verbose) System.out.println("Processing " + inputFile.getPath() + ".");
       if (verbose) System.out.println("Counting images.");
       run = 0;
       readFile(indexWriter, inputFile);
       if (verbose) System.out.printf("%d images found in the data file.\n", docCount);
       int numberOfRepresentatives = 1000; // TODO: clever selection.
       // select a number of representative "fixed stars" randomly from file
       if (numberOfRepresentatives > Math.sqrt(docCount))
         numberOfRepresentatives = (int) Math.sqrt(docCount);
       if (verbose)
         System.out.printf(
             "Selecting %d representative images for hashing.\n", numberOfRepresentatives);
       representativesID = new HashSet<Integer>(numberOfRepresentatives);
       while (representativesID.size() < numberOfRepresentatives) {
         representativesID.add((int) Math.floor(Math.random() * (docCount - 1)));
       }
       representatives = new ArrayList<LireFeature>(numberOfRepresentatives);
       docCount = 0;
       run = 1;
       if (verbose) System.out.println("Now getting representatives from the data file.");
       readFile(indexWriter, inputFile);
       docCount = 0;
       run = 2;
       if (verbose) System.out.println("Finally we start the indexing process, please wait ...");
       readFile(indexWriter, inputFile);
       if (verbose) System.out.println("Indexing finished.");
     }
     indexWriter.commit();
     indexWriter.close();
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
Esempio n. 7
0
 public void testIndexLarge() throws IOException {
   //        ArrayList<String> images = FileUtils.getAllImages(new
   // File("C:\\Temp\\testImagelogos"), true);
   ArrayList<String> images =
       FileUtils.getAllImages(
           new File("C:\\Java\\Projects\\LireSVN\\testdata\\flickr-10000"), false);
   IndexWriter iw =
       LuceneUtils.createIndexWriter(
           "index-large", true, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
   // select one feature for the large index:
   int featureIndex = 13;
   int count = 0;
   long ms = System.currentTimeMillis();
   DocumentBuilder builder = new ChainedDocumentBuilder();
   ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]);
   //        ((ChainedDocumentBuilder) builder).addBuilder(builders[0]);
   for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) {
     count++;
     if (count > 100 && count % 500 == 0) {
       System.out.println(
           count
               + " files indexed. "
               + (System.currentTimeMillis() - ms) / (count)
               + " ms per file");
     }
     String file = iterator.next();
     try {
       // try to trim the image first ....
       //                BufferedImage img = ImageUtils.trimWhiteSpace(ImageIO.read(new
       // FileInputStream(file)));
       //                iw.addDocument(builder.createDocument(img, file));
       iw.addDocument(builder.createDocument(new FileInputStream(file), file));
     } catch (Exception e) {
       e
           .printStackTrace(); // To change body of catch statement use File | Settings | File
                               // Templates.
     }
   }
   iw.close();
 }
Esempio n. 8
0
  private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath)
      throws IOException {
    //        System.out.println(">> Indexing " + images.size() + " files.");
    //        DocumentBuilder builder = DocumentBuilderFactory.getExtensiveDocumentBuilder();
    //        DocumentBuilder builder = DocumentBuilderFactory.getFastDocumentBuilder();
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true);
    int count = 0;
    long time = System.currentTimeMillis();
    for (String identifier : images) {
      Document doc = builder.createDocument(new FileInputStream(identifier), identifier);
      iw.addDocument(doc);
      count++;
      if (count % 100 == 0) System.out.println(count + " files indexed.");
      //            if (count == 200) break;
    }
    long timeTaken = (System.currentTimeMillis() - time);
    float sec = ((float) timeTaken) / 1000f;

    System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
    iw.commit();
    iw.close();
  }
Esempio n. 9
0
  public void testCreateAndSearchSmallIndex() throws IOException {
    for (int i = 0, buildersLength = builders.length; i < buildersLength; i++) {
      DocumentBuilder b = builders[i];
      // create an index with a specific builder:
      IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true);
      for (String identifier : testFiles) {
        Document doc =
            b.createDocument(new FileInputStream(testFilesPath + identifier), identifier);
        doc.add(new StoredField("video_file", "surgery1.mp4"));
        doc.add(new StoredField("timestamp", "25"));
        iw.addDocument(doc);
      }
      iw.close();

      ImageSearcher s = searchers[i];
      IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-small")));
      for (int k = 0; k < reader.maxDoc(); k++) {
        Document query = reader.document(k);
        ImageSearchHits hits = s.search(query, reader);
        for (int y = 0; y < hits.length(); y++) {
          Document result = hits.doc(y);
          if (y == 0) {
            // check if the first result is the query:
            assertEquals(
                result.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0].equals(
                    query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
                true);
            System.out.println(result.getValues("video_file")[0]);
          } else {
            // check if they are ordered by distance:
            assertEquals(hits.score(y) < hits.score(y - 1), true);
          }
        }
      }
    }
  }
  /**
   * Creates a set of reference objects and stores it in a new index (name "<indexPath>-ro"). Then
   * creates ordered lists of reference object positions for each data item in the index with given
   * feature. Finally a new index (name "<indexPath>-ms") is created where all the original
   * documents as well as the new data are stored.
   *
   * @param indexPath the path to the original index
   * @throws IOException
   */
  public void createIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();

    if (numDocs < numReferenceObjects) {
      throw new UnsupportedOperationException("Too few documents in index.");
    }

    // progress report
    progress.setNumDocsAll(numDocs);
    progress.setCurrentState(State.RoSelection);

    boolean hasDeletions = reader.hasDeletions();

    // init reference objects:
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
    HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects);

    double numDocsDouble = (double) numDocs;
    while (referenceObjsIds.size() < numReferenceObjects) {
      referenceObjsIds.add((int) (numDocsDouble * Math.random()));
    }
    int count = 0;

    if (hasDeletions) {
      System.err.println(
          "WARNING: There are deleted docs in your index. You should "
              + "optimize your index before using this method.");
    }

    // progress report
    progress.setCurrentState(State.RoIndexing);

    // find them in the index and put them into a separate index:
    for (int i : referenceObjsIds) {
      count++;
      Document document = reader.document(i);
      document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
      iw.addDocument(document);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Indexing);

    // now find the reference objects for each entry ;)
    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);

    iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      ImageSearchHits hits = searcher.search(document, readerRo);
      sb.delete(0, sb.length());
      for (int j = 0; j < numReferenceObjectsUsed; j++) {
        sb.append(hits.doc(j).getValues("ro-id")[0]);
        sb.append(' ');
      }
      // System.out.println(sb.toString());
      document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
      iw.updateDocument(
          new Term(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
          document);

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Idle);
  }