Ejemplo n.º 1
0
  private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath)
      throws IOException {
    // eventually check if the directory is there or not ...
    IndexWriter iw = LuceneUtils.createIndexWriter(testIndex, false);
    int count = 0;
    long time = System.currentTimeMillis();
    for (String identifier : images) {
      // TODO: cut toes from the image ... -> doesn't work out very well. Stable at first,
      // decreasing then.
      // TODO: Joint Histogram ...
      // TODO: LSA / PCA on the vectors ...-> this looks like a job for me :-D
      // TODO: local features ...
      Document doc = null;
      if (cutImages) {
        BufferedImage bimg =
            ImageUtils.cropImage(ImageIO.read(new FileInputStream(identifier)), 0, 0, 200, 69);
        doc = builder.createDocument(bimg, identifier);
      } else doc = builder.createDocument(new FileInputStream(identifier), identifier);
      iw.addDocument(doc);
      count++;
      if (count % 100 == 0) {
        int percent = (int) Math.floor(((double) count * 100.0) / (double) images.size());
        double timeTemp = (double) (System.currentTimeMillis() - time) / 1000d;
        int secsLeft =
            (int) Math.round(((timeTemp / (double) count) * (double) images.size()) - timeTemp);
        System.out.println(percent + "% finished (" + count + " files), " + secsLeft + " s left");
      }
    }
    long timeTaken = (System.currentTimeMillis() - time);
    float sec = ((float) timeTaken) / 1000f;

    System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
    iw.commit();
    iw.close();
  }
Ejemplo n.º 2
0
 private void indexFiles(String dir, String index, int featureIndex, boolean createNewIndex)
     throws IOException {
   ArrayList<String> images = FileUtils.getAllImages(new File(dir), true);
   IndexWriter iw =
       LuceneUtils.createIndexWriter(
           index, createNewIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
   // select one feature for the large index:
   int count = 0;
   long ms = System.currentTimeMillis();
   DocumentBuilder builder = new ChainedDocumentBuilder();
   ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]);
   //        ((ChainedDocumentBuilder) builder).addBuilder(builders[0]);
   for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) {
     count++;
     if (count > 100 && count % 5000 == 0) {
       System.out.println(
           count
               + " files indexed. "
               + (System.currentTimeMillis() - ms) / (count)
               + " ms per file");
     }
     String file = iterator.next();
     try {
       iw.addDocument(builder.createDocument(new FileInputStream(file), file));
     } catch (Exception e) {
       System.err.println("Error: " + e.getMessage());
     }
   }
   iw.close();
 }
Ejemplo n.º 3
0
 private Document indexFiles() throws IOException {
   System.out.println("---< indexing >-------------------------");
   int count = 0;
   DocumentBuilder builder = getDocumentBuilder();
   ArrayList<String> allImages = FileUtils.getAllImages(new File("wang-1000"), true);
   IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true);
   Document document = null;
   for (Iterator<String> iterator = allImages.iterator(); iterator.hasNext(); ) {
     String filename = iterator.next();
     BufferedImage image = ImageIO.read(new FileInputStream(filename));
     document = builder.createDocument(image, filename);
     iw.addDocument(document);
     count++;
     if (count % 50 == 0)
       System.out.println("finished " + (count * 100) / allImages.size() + "% of the images.");
   }
   iw.close();
   return document;
 }
Ejemplo n.º 4
0
 public void testIndexLarge() throws IOException {
   //        ArrayList<String> images = FileUtils.getAllImages(new
   // File("C:\\Temp\\testImagelogos"), true);
   ArrayList<String> images =
       FileUtils.getAllImages(
           new File("C:\\Java\\Projects\\LireSVN\\testdata\\flickr-10000"), false);
   IndexWriter iw =
       LuceneUtils.createIndexWriter(
           "index-large", true, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
   // select one feature for the large index:
   int featureIndex = 13;
   int count = 0;
   long ms = System.currentTimeMillis();
   DocumentBuilder builder = new ChainedDocumentBuilder();
   ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]);
   //        ((ChainedDocumentBuilder) builder).addBuilder(builders[0]);
   for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) {
     count++;
     if (count > 100 && count % 500 == 0) {
       System.out.println(
           count
               + " files indexed. "
               + (System.currentTimeMillis() - ms) / (count)
               + " ms per file");
     }
     String file = iterator.next();
     try {
       // try to trim the image first ....
       //                BufferedImage img = ImageUtils.trimWhiteSpace(ImageIO.read(new
       // FileInputStream(file)));
       //                iw.addDocument(builder.createDocument(img, file));
       iw.addDocument(builder.createDocument(new FileInputStream(file), file));
     } catch (Exception e) {
       e
           .printStackTrace(); // To change body of catch statement use File | Settings | File
                               // Templates.
     }
   }
   iw.close();
 }
Ejemplo n.º 5
0
  private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath)
      throws IOException {
    //        System.out.println(">> Indexing " + images.size() + " files.");
    //        DocumentBuilder builder = DocumentBuilderFactory.getExtensiveDocumentBuilder();
    //        DocumentBuilder builder = DocumentBuilderFactory.getFastDocumentBuilder();
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true);
    int count = 0;
    long time = System.currentTimeMillis();
    for (String identifier : images) {
      Document doc = builder.createDocument(new FileInputStream(identifier), identifier);
      iw.addDocument(doc);
      count++;
      if (count % 100 == 0) System.out.println(count + " files indexed.");
      //            if (count == 200) break;
    }
    long timeTaken = (System.currentTimeMillis() - time);
    float sec = ((float) timeTaken) / 1000f;

    System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
    iw.commit();
    iw.close();
  }
Ejemplo n.º 6
0
  public void testCreateAndSearchSmallIndex() throws IOException {
    for (int i = 0, buildersLength = builders.length; i < buildersLength; i++) {
      DocumentBuilder b = builders[i];
      // create an index with a specific builder:
      IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true);
      for (String identifier : testFiles) {
        Document doc =
            b.createDocument(new FileInputStream(testFilesPath + identifier), identifier);
        doc.add(new StoredField("video_file", "surgery1.mp4"));
        doc.add(new StoredField("timestamp", "25"));
        iw.addDocument(doc);
      }
      iw.close();

      ImageSearcher s = searchers[i];
      IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-small")));
      for (int k = 0; k < reader.maxDoc(); k++) {
        Document query = reader.document(k);
        ImageSearchHits hits = s.search(query, reader);
        for (int y = 0; y < hits.length(); y++) {
          Document result = hits.doc(y);
          if (y == 0) {
            // check if the first result is the query:
            assertEquals(
                result.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0].equals(
                    query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
                true);
            System.out.println(result.getValues("video_file")[0]);
          } else {
            // check if they are ordered by distance:
            assertEquals(hits.score(y) < hits.score(y - 1), true);
          }
        }
      }
    }
  }