private void indexFiles(String dir, String index, int featureIndex, boolean createNewIndex) throws IOException { ArrayList<String> images = FileUtils.getAllImages(new File(dir), true); IndexWriter iw = LuceneUtils.createIndexWriter( index, createNewIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); // select one feature for the large index: int count = 0; long ms = System.currentTimeMillis(); DocumentBuilder builder = new ChainedDocumentBuilder(); ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]); // ((ChainedDocumentBuilder) builder).addBuilder(builders[0]); for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) { count++; if (count > 100 && count % 5000 == 0) { System.out.println( count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file"); } String file = iterator.next(); try { iw.addDocument(builder.createDocument(new FileInputStream(file), file)); } catch (Exception e) { System.err.println("Error: " + e.getMessage()); } } iw.close(); }
public void testIndexLarge() throws IOException { // ArrayList<String> images = FileUtils.getAllImages(new // File("C:\\Temp\\testImagelogos"), true); ArrayList<String> images = FileUtils.getAllImages( new File("C:\\Java\\Projects\\LireSVN\\testdata\\flickr-10000"), false); IndexWriter iw = LuceneUtils.createIndexWriter( "index-large", true, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); // select one feature for the large index: int featureIndex = 13; int count = 0; long ms = System.currentTimeMillis(); DocumentBuilder builder = new ChainedDocumentBuilder(); ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]); // ((ChainedDocumentBuilder) builder).addBuilder(builders[0]); for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) { count++; if (count > 100 && count % 500 == 0) { System.out.println( count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file"); } String file = iterator.next(); try { // try to trim the image first .... // BufferedImage img = ImageUtils.trimWhiteSpace(ImageIO.read(new // FileInputStream(file))); // iw.addDocument(builder.createDocument(img, file)); iw.addDocument(builder.createDocument(new FileInputStream(file), file)); } catch (Exception e) { e .printStackTrace(); // To change body of catch statement use File | Settings | File // Templates. } } iw.close(); }
public void testCreateAndSearchSmallIndex() throws IOException { for (int i = 0, buildersLength = builders.length; i < buildersLength; i++) { DocumentBuilder b = builders[i]; // create an index with a specific builder: IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true); for (String identifier : testFiles) { Document doc = b.createDocument(new FileInputStream(testFilesPath + identifier), identifier); doc.add(new StoredField("video_file", "surgery1.mp4")); doc.add(new StoredField("timestamp", "25")); iw.addDocument(doc); } iw.close(); ImageSearcher s = searchers[i]; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-small"))); for (int k = 0; k < reader.maxDoc(); k++) { Document query = reader.document(k); ImageSearchHits hits = s.search(query, reader); for (int y = 0; y < hits.length(); y++) { Document result = hits.doc(y); if (y == 0) { // check if the first result is the query: assertEquals( result.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0].equals( query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), true); System.out.println(result.getValues("video_file")[0]); } else { // check if they are ordered by distance: assertEquals(hits.score(y) < hits.score(y - 1), true); } } } } }
private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath) throws IOException { // System.out.println(">> Indexing " + images.size() + " files."); // DocumentBuilder builder = DocumentBuilderFactory.getExtensiveDocumentBuilder(); // DocumentBuilder builder = DocumentBuilderFactory.getFastDocumentBuilder(); IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true); int count = 0; long time = System.currentTimeMillis(); for (String identifier : images) { Document doc = builder.createDocument(new FileInputStream(identifier), identifier); iw.addDocument(doc); count++; if (count % 100 == 0) System.out.println(count + " files indexed."); // if (count == 200) break; } long timeTaken = (System.currentTimeMillis() - time); float sec = ((float) timeTaken) / 1000f; System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image."); iw.commit(); iw.close(); }