private void indexFiles(String dir, String index, int featureIndex, boolean createNewIndex) throws IOException { ArrayList<String> images = FileUtils.getAllImages(new File(dir), true); IndexWriter iw = LuceneUtils.createIndexWriter( index, createNewIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); // select one feature for the large index: int count = 0; long ms = System.currentTimeMillis(); DocumentBuilder builder = new ChainedDocumentBuilder(); ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]); // ((ChainedDocumentBuilder) builder).addBuilder(builders[0]); for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) { count++; if (count > 100 && count % 5000 == 0) { System.out.println( count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file"); } String file = iterator.next(); try { iw.addDocument(builder.createDocument(new FileInputStream(file), file)); } catch (Exception e) { System.err.println("Error: " + e.getMessage()); } } iw.close(); }
public void testSearchRunTime() throws IOException { int queryDocID; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new"))); int featureIndex = 0; ImageSearchHits hits = searchers[featureIndex].search(reader.document(0), reader); hits = searchers[featureIndex].search(reader.document(1), reader); long ms = System.currentTimeMillis(); for (int i = 0; i < 100; i++) { queryDocID = i; // select one feature for the large index: hits = searchers[featureIndex].search(reader.document(queryDocID), reader); } ms = System.currentTimeMillis() - ms; System.out.println("ms = " + ms / 100); }
public void testRerankFilters() throws IOException { int queryDocID = (int) (Math.random() * 10000); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large"))); // select one feature for the large index: int featureIndex = 4; int count = 0; long ms = System.currentTimeMillis(); ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader); RerankFilter rerank = new RerankFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD); LsaFilter lsa = new LsaFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD); FileUtils.saveImageResultsToPng( "GeneralTest_rerank_0_old", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); hits = rerank.filter(hits, reader.document(queryDocID)); FileUtils.saveImageResultsToPng( "GeneralTest_rerank_1_new", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); hits = lsa.filter(hits, reader.document(queryDocID)); FileUtils.saveImageResultsToPng( "GeneralTest_rerank_2_lsa", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); }
public void testIndexLarge() throws IOException { // ArrayList<String> images = FileUtils.getAllImages(new // File("C:\\Temp\\testImagelogos"), true); ArrayList<String> images = FileUtils.getAllImages( new File("C:\\Java\\Projects\\LireSVN\\testdata\\flickr-10000"), false); IndexWriter iw = LuceneUtils.createIndexWriter( "index-large", true, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); // select one feature for the large index: int featureIndex = 13; int count = 0; long ms = System.currentTimeMillis(); DocumentBuilder builder = new ChainedDocumentBuilder(); ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]); // ((ChainedDocumentBuilder) builder).addBuilder(builders[0]); for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) { count++; if (count > 100 && count % 500 == 0) { System.out.println( count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file"); } String file = iterator.next(); try { // try to trim the image first .... // BufferedImage img = ImageUtils.trimWhiteSpace(ImageIO.read(new // FileInputStream(file))); // iw.addDocument(builder.createDocument(img, file)); iw.addDocument(builder.createDocument(new FileInputStream(file), file)); } catch (Exception e) { e .printStackTrace(); // To change body of catch statement use File | Settings | File // Templates. } } iw.close(); }
private void indexFiles(ArrayList<String> images, DocumentBuilder builder, String indexPath) throws IOException { // System.out.println(">> Indexing " + images.size() + " files."); // DocumentBuilder builder = DocumentBuilderFactory.getExtensiveDocumentBuilder(); // DocumentBuilder builder = DocumentBuilderFactory.getFastDocumentBuilder(); IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true); int count = 0; long time = System.currentTimeMillis(); for (String identifier : images) { Document doc = builder.createDocument(new FileInputStream(identifier), identifier); iw.addDocument(doc); count++; if (count % 100 == 0) System.out.println(count + " files indexed."); // if (count == 200) break; } long timeTaken = (System.currentTimeMillis() - time); float sec = ((float) timeTaken) / 1000f; System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image."); iw.commit(); iw.close(); }
public void testReUse() throws IOException, IllegalAccessException, InstantiationException { ArrayList<String> testFiles = FileUtils.getAllImages(new File("testdata/ferrari"), true); for (Class c : featureClasses) { LireFeature f1 = (LireFeature) c.newInstance(); System.out.println(c.getName()); for (String testFile : testFiles) { f1.extract(ImageIO.read(new File(testFile))); LireFeature f2 = (LireFeature) c.newInstance(); f2.extract(ImageIO.read(new File(testFile))); // System.out.println(Arrays.toString(f1.getDoubleHistogram())); // System.out.println(Arrays.toString(f2.getDoubleHistogram())); assertEquals(f2.getDistance(f1), 0d, 0.000000001); f2.setByteArrayRepresentation(f1.getByteArrayRepresentation()); assertEquals(f2.getDistance(f1), 0d, 0.000000001); byte[] tmp = new byte[1024 * 100]; Arrays.fill(tmp, (byte) 0x000F); byte[] bytes = f1.getByteArrayRepresentation(); System.arraycopy(bytes, 0, tmp, 12, bytes.length); f2.setByteArrayRepresentation(tmp, 12, bytes.length); assertEquals(f2.getDistance(f1), 0d, 0.000000001); } } }
public void testSearchIndexLarge() throws IOException { for (int i = 0; i < 10; i++) { int queryDocID = (int) (Math.random() * 800); // queryDocID = 877 * (i + 1); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large"))); // select one feature for the large index: int featureIndex = 13; int count = 0; long ms = System.currentTimeMillis(); ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader); for (int j = 0; j < hits.length(); j++) { String fileName = hits.doc(j).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(hits.score(j) + ": \t" + fileName); } // FileUtils.saveImageResultsToHtml("GeneralTest_testSearchIndexLarge_", hits, // reader.document(10).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); FileUtils.saveImageResultsToPng( "GeneralTest_testSearchIndexLarge_" + i + "_", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); } }
public void testClassify() throws IOException { boolean weightByRank = true; String[] classes = { "2012", "beach", "food", "london", "music", "nature", "people", "sky", "travel", "wedding" }; int k = 50; // CONFIG String fieldName = DocumentBuilder.FIELD_NAME_COLORLAYOUT; LireFeature feature = new ColorLayout(); String indexPath = "E:\\acmgc-cl-idx"; System.out.println( "Tests for feature " + fieldName + " with k=" + k + " - weighting by rank sum: " + weightByRank); System.out.println("========================================"); HashMap<String, Integer> tag2count = new HashMap<String, Integer>(k); HashMap<String, Double> tag2weight = new HashMap<String, Double>(k); int c = 9; // used for just one class ... // for (int c = 0; c < 10; c++) { String classIdentifier = classes[c]; String listFiles = "D:\\DataSets\\Yahoo-GC\\test\\" + classIdentifier + ".txt"; // INIT int[] confusion = new int[10]; Arrays.fill(confusion, 0); HashMap<String, Integer> class2id = new HashMap<String, Integer>(10); for (int i = 0; i < classes.length; i++) class2id.put(classes[i], i); BufferedReader br = new BufferedReader(new FileReader(listFiles)); String line; IndexReader ir = DirectoryReader.open(MMapDirectory.open(new File(indexPath))); // in-memory linear search // ImageSearcher bis = new GenericFastImageSearcher(k, feature.getClass(), fieldName, // true, ir); // hashing based searcher BitSamplingImageSearcher bis = new BitSamplingImageSearcher(k, fieldName, fieldName + "_hash", feature, 1000); ImageSearchHits hits; int count = 0, countCorrect = 0; long ms = System.currentTimeMillis(); while ((line = br.readLine()) != null) { try { tag2count.clear(); tag2weight.clear(); hits = bis.search(ImageIO.read(new File(line)), ir); // set tag weights and counts. for (int l = 0; l < k; l++) { String tag = getTag(hits.doc(l)); if (tag2count.get(tag) == null) tag2count.put(tag, 1); else tag2count.put(tag, tag2count.get(tag) + 1); if (weightByRank) { if (tag2weight.get(tag) == null) tag2weight.put(tag, (double) l); else tag2weight.put(tag, (double) l + tag2weight.get(tag)); } else { if (tag2weight.get(tag) == null) tag2weight.put(tag, Double.valueOf(hits.score(l))); else tag2weight.put(tag, (double) l + hits.score(l)); } } // find class: int maxCount = 0, maxima = 0; String classifiedAs = null; for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext(); ) { String tag = tagIterator.next(); if (tag2count.get(tag) > maxCount) { maxCount = tag2count.get(tag); maxima = 1; classifiedAs = tag; } else if (tag2count.get(tag) == maxCount) { maxima++; } } // if there are two or more classes with the same number of results, then we take a look at // the weights. // else the class is alread given in classifiedAs. if (maxima > 1) { double minWeight = Double.MAX_VALUE; for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext(); ) { String tag = tagIterator.next(); if (tag2weight.get(tag) < minWeight) { minWeight = tag2weight.get(tag); classifiedAs = tag; } } } // if (tag2.equals(tag3)) tag1 = tag2; count++; if (classifiedAs.equals(classIdentifier)) countCorrect++; // confusion: confusion[class2id.get(classifiedAs)]++; // System.out.printf("%10s (%4.3f, %10d, %4d)\n", classifiedAs, ((double) // countCorrect / (double) count), count, (System.currentTimeMillis() - ms) / count); } catch (Exception e) { System.err.println(e.getMessage()); } } // System.out.println("Results for class " + classIdentifier); System.out.printf("Class\tAvg. Precision\tCount Test Images\tms per test\n"); System.out.printf( "%s\t%4.5f\t%10d\t%4d\n", classIdentifier, ((double) countCorrect / (double) count), count, (System.currentTimeMillis() - ms) / count); System.out.printf("Confusion\t"); // for (int i = 0; i < classes.length; i++) { // System.out.printf("%s\t", classes[i]); // } // System.out.println(); for (int i = 0; i < classes.length; i++) { System.out.printf("%d\t", confusion[i]); } System.out.println(); // } }
protected void setUp() throws Exception { super.setUp(); // set to all queries ... approach "leave one out" sampleQueries = new int[1000]; for (int i = 0; i < sampleQueries.length; i++) { sampleQueries[i] = i; } indexPath += "-" + System.currentTimeMillis() % (1000 * 60 * 60 * 24 * 7); // Setting up DocumentBuilder: // parallelIndexer = new ParallelIndexer(8, indexPath, testExtensive); parallelIndexer = new ParallelIndexer(8, indexPath, testExtensive, true) { @Override public void addBuilders(ChainedDocumentBuilder builder) { // builder.addBuilder(DocumentBuilderFactory.getCEDDDocumentBuilder()); // // builder.addBuilder(DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder()); builder.addBuilder(DocumentBuilderFactory.getColorLayoutBuilder()); builder.addBuilder(DocumentBuilderFactory.getEdgeHistogramBuilder()); // builder.addBuilder(DocumentBuilderFactory.getFCTHDocumentBuilder()); builder.addBuilder(DocumentBuilderFactory.getJCDDocumentBuilder()); // // builder.addBuilder(DocumentBuilderFactory.getJointHistogramDocumentBuilder()); builder.addBuilder(DocumentBuilderFactory.getOpponentHistogramDocumentBuilder()); builder.addBuilder(DocumentBuilderFactory.getPHOGDocumentBuilder()); // // builder.addBuilder(DocumentBuilderFactory.getColorHistogramDocumentBuilder()); builder.addBuilder(DocumentBuilderFactory.getScalableColorBuilder()); // // builder.addBuilder(DocumentBuilderFactory.getLuminanceLayoutDocumentBuilder()); // // builder.addBuilder(DocumentBuilderFactory.getJpegCoefficientHistogramDocumentBuilder()); // // builder.addBuilder(DocumentBuilderFactory.getColorHistogramDocumentBuilder()); // builder.addBuilder(DocumentBuilderFactory.getGaborDocumentBuilder()); // builder.addBuilder(DocumentBuilderFactory.getTamuraDocumentBuilder()); // builder.addBuilder(DocumentBuilderFactory.getScalableColorBuilder()); // builder.addBuilder(new GenericDocumentBuilder(RankAndOpponent.class, // "jop")); // builder.addBuilder(new // GenericFastDocumentBuilder(FuzzyOpponentHistogram.class, "opHist")); // builder.addBuilder(new SurfDocumentBuilder()); // builder.addBuilder(new MSERDocumentBuilder()); // builder.addBuilder(new SiftDocumentBuilder()); // builder.addBuilder(new GenericDocumentBuilder(SPCEDD.class, // "spcedd")); // builder.addBuilder(new GenericDocumentBuilder(SPFCTH.class, // "spfcth")); // builder.addBuilder(new GenericDocumentBuilder(SPJCD.class, "spjcd")); // builder.addBuilder(new GenericDocumentBuilder(SPACC.class, "spacc")); // builder.addBuilder(new // GenericDocumentBuilder(LocalBinaryPatterns.class, "lbp")); // builder.addBuilder(new // GenericDocumentBuilder(BinaryPatternsPyramid.class, "whog")); // builder.addBuilder(new // GenericDocumentBuilder(LocalBinaryPatternsAndOpponent.class, "jhl")); // builder.addBuilder(new // GenericDocumentBuilder(RotationInvariantLocalBinaryPatterns.class, "rlbp")); // builder.addBuilder(new GenericDocumentBuilder(SPLBP.class, "splbp")); } }; }
public void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException { Pattern p = Pattern.compile("([0-9]+).jpg"); double map = 0; double errorRate = 0d; double precision10 = 0d; double[] pr10cat = new double[10]; double[] pr10cnt = new double[10]; for (int i = 0; i < pr10cat.length; i++) { pr10cat[i] = 0d; pr10cnt[i] = 0d; } long sum = 0, ms = 0; for (int i = 0; i < sampleQueries.length; i++) { int id = sampleQueries[i]; String file = testExtensive + "/" + id + ".jpg"; ms = System.currentTimeMillis(); ImageSearchHits hits = searcher.search(findDoc(reader, id + ".jpg"), reader); sum += (System.currentTimeMillis() - ms); int goodOnes = 0; double avgPrecision = 0d; double precision10temp = 0d; int countResults = 0; for (int j = 0; j < hits.length(); j++) { Document d = hits.doc(j); String hitsId = d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; Matcher matcher = p.matcher(hitsId); if (matcher.find()) hitsId = matcher.group(1); else fail("Did not get the number ..."); int testID = Integer.parseInt(hitsId); if (testID != id) countResults++; if ((testID != id) && ((int) Math.floor(id / 100) == (int) Math.floor(testID / 100))) { goodOnes++; // Only if there is a change in recall avgPrecision += (double) goodOnes / (double) countResults; // System.out.print("x"); if (j <= 10) { precision10temp += 1d; } } else { if (j == 1) { // error rate errorRate++; } } } // end for loop iterating results. // if (avgPrecision<=0) { // System.out.println("avgPrecision = " + avgPrecision); // System.out.println("goodOnes = " + goodOnes); // } assertTrue("Check if average precision is > 0", avgPrecision > 0); assertTrue("Check if goodOnes is > 0", goodOnes > 0); avgPrecision = avgPrecision / goodOnes; precision10 += precision10temp / 10d; // precision @ 10 for each category ... pr10cat[(int) Math.floor(id / 100)] += precision10temp / 10d; pr10cnt[(int) Math.floor(id / 100)] += 1d; map += avgPrecision; } map = map / sampleQueries.length; errorRate = errorRate / sampleQueries.length; precision10 = precision10 / sampleQueries.length; System.out.print(prefix + "\t"); System.out.print(String.format("%.5f", map) + '\t'); System.out.print(String.format("%.5f", precision10) + '\t'); System.out.print(String.format("%.5f", errorRate) + '\t'); // precision@10 per category for (int i = 0; i < pr10cat.length; i++) { double v = 0; if (pr10cnt[i] > 0) v = pr10cat[i] / pr10cnt[i]; // System.out.print(i + ": "); System.out.printf("%.5f\t", v); } System.out.printf("%2.3f\t", (double) sum / (double) sampleQueries.length); System.out.println(); }