// LUCENE-1262 public void testExceptions() throws Throwable { Path indexDir = createTempDir("testfieldswriterexceptions"); Directory fsDir = newFSDirectory(indexDir); FaultyFSDirectory dir = new FaultyFSDirectory(fsDir); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); for (int i = 0; i < 2; i++) writer.addDocument(testDoc); writer.forceMerge(1); writer.close(); IndexReader reader = DirectoryReader.open(dir); dir.startFailing(); boolean exc = false; for (int i = 0; i < 2; i++) { try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } } assertTrue(exc); reader.close(); dir.close(); }
/** * Puts results into a HTML file. * * @param prefix * @param hits * @param reader * @param queryImage * @return * @throws IOException */ public static String saveImageResultsToHtml( String prefix, TopDocs hits, IndexReader reader, String queryImage) throws IOException { long l = System.currentTimeMillis() / 1000; String fileName = "results-" + prefix + "-" + l + ".html"; BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)); bw.write( "<html>\n" + "<head><title>Search Results</title></head>\n" + "<body bgcolor=\"#FFFFFF\">\n"); bw.write("<h3>query</h3>\n"); bw.write( "<a href=\"file://" + queryImage + "\"><img src=\"file://" + queryImage + "\"></a><p>\n"); bw.write("<h3>results</h3>\n"); for (int i = 0; i < hits.scoreDocs.length; i++) { bw.write( hits.scoreDocs[i].score + " - <a href=\"file://" + reader.document(hits.scoreDocs[i].doc).get("descriptorImageIdentifier") + "\"><img src=\"file://" + reader.document(hits.scoreDocs[i].doc).get("descriptorImageIdentifier") + "\"></a><p>\n"); } bw.write("</body>\n" + "</html>"); bw.close(); return fileName; }
public void test() throws IOException { assertTrue(dir != null); assertTrue(fieldInfos != null); IndexReader reader = DirectoryReader.open(dir); Document doc = reader.document(0); assertTrue(doc != null); assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null); Field field = (Field) doc.getField(DocHelper.TEXT_FIELD_2_KEY); assertTrue(field != null); assertTrue(field.fieldType().storeTermVectors()); assertFalse(field.fieldType().omitNorms()); assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); field = (Field) doc.getField(DocHelper.TEXT_FIELD_3_KEY); assertTrue(field != null); assertFalse(field.fieldType().storeTermVectors()); assertTrue(field.fieldType().omitNorms()); assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); field = (Field) doc.getField(DocHelper.NO_TF_KEY); assertTrue(field != null); assertFalse(field.fieldType().storeTermVectors()); assertFalse(field.fieldType().omitNorms()); assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(DocHelper.TEXT_FIELD_3_KEY); reader.document(0, visitor); final List<IndexableField> fields = visitor.getDocument().getFields(); assertEquals(1, fields.size()); assertEquals(DocHelper.TEXT_FIELD_3_KEY, fields.get(0).name()); reader.close(); }
public void testSearch() throws IOException { int docNumber = 1; MetricSpacesInvertedListIndexing ms = MetricSpacesInvertedListIndexing.getDefaultInstance(); MetricSpacesInvertedListIndexing.numReferenceObjectsUsed = 10; MetricSpacesInvertedListIndexing.numReferenceObjects = 50; IndexReader reader = ms.getIndexReader(indexPath); TopDocs docs = ms.search(reader.document(docNumber), indexPath); // print the results BufferedWriter bw = new BufferedWriter(new FileWriter("out.html")); bw.write("<html><body>"); for (int i = 0; i < docs.scoreDocs.length; i++) { ScoreDoc scoreDoc = docs.scoreDocs[i]; bw.write( "<img title=\"ID: " + scoreDoc.doc + ", " + "Score: " + scoreDoc.score + "\" src=\"file:///" + reader.document(scoreDoc.doc).getValues("descriptorImageIdentifier")[0] + "\"> "); } bw.write("</body></html>"); bw.close(); showUrl("out.html"); }
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: if (!IndexReader.indexExists(reader.directory())) throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); ScalableColor sc = null; ColorLayout cl = null; EdgeHistogram eh = null; String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT); if (cls != null && cls.length > 0) { cl = new ColorLayout(); cl.setStringRepresentation(cls[0]); } String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR); if (scs != null && scs.length > 0) { sc = new ScalableColor(); sc.setStringRepresentation(scs[0]); } String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM); if (ehs != null && ehs.length > 0) { eh = new EdgeHistogram(); eh.setStringRepresentation(ehs[0]); } HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // find duplicates ... boolean hasDeletions = reader.hasDeletions(); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (hasDeletions && reader.isDeleted(i)) { continue; } Document d = reader.document(i); float distance = getDistance(d, cl, sc, eh); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } return new SimpleImageDuplicates(results); }
public double singleSearch(int docNum) throws IOException, InstantiationException, IllegalAccessException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); // ----------- String query = reader.document(docNum).getValues("hash")[0]; CEDD ceddQuery = new CEDD(); ceddQuery.setByteArrayRepresentation( reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length); // ----------- HashSet<String> gold = new HashSet<String>(numImagesEval); ImageSearcher cis = ImageSearcherFactory.createCEDDImageSearcher(100); ImageSearchHits hits = cis.search(reader.document(docNum), reader); for (int i = 0; i < 10; i++) { gold.add(hits.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); } // ------------ IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity( new SimilarityBase() { @Override protected float score(BasicStats basicStats, float freq, float v2) { return 1; } @Override public String toString() { return null; } }); TopDocs topDocs = searcher.search(createQuery(query), 500); topDocs = rerank(topDocs, ceddQuery, reader); // System.out.println("topDocs.scoreDocs.length = " + topDocs.scoreDocs.length); double numMatches = 0; for (int i = 0; i < topDocs.scoreDocs.length; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; // System.out.print(scoreDoc.score + ": "); String file = reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; // System.out.println(file.substring(file.lastIndexOf('/') + 1) + // (gold.contains(file)?" x":" o")); if (gold.contains(file)) numMatches++; } return numMatches; }
public void tttestGetDistribution() throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv")); IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); // get the first document: // if (!IndexReader.indexExists(reader.directory())) // throw new FileNotFoundException("No index found at this specific location."); CEDD cedd1 = new CEDD(); FCTH fcth1 = new FCTH(); CEDD cedd2 = new CEDD(); FCTH fcth2 = new FCTH(); JCD jcd1 = new JCD(); JCD jcd2 = new JCD(); String[] cls; // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document doc = reader.document(i); cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD); if (cls != null && cls.length > 0) cedd1.setStringRepresentation(cls[0]); cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH); if (cls != null && cls.length > 0) fcth1.setStringRepresentation(cls[0]); for (int j = i + 1; j < docs; j++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document doc2 = reader.document(j); cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD); if (cls != null && cls.length > 0) cedd2.setStringRepresentation(cls[0]); cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH); if (cls != null && cls.length > 0) fcth2.setStringRepresentation(cls[0]); jcd1.init(cedd1, fcth1); jcd2.init(cedd2, fcth2); bw.write( cedd1.getDistance(cedd2) + ";" + fcth1.getDistance(fcth2) + ";" + jcd1.getDistance(jcd2) + "\n"); } if (i % 100 == 0) System.out.println(i + " entries processed ... "); } bw.close(); }
/** * words in each records in input is sorted by document frequency, if ceil(prefix*length)-prefix * share at least one token, block them, * * @param input * @param lines number of lines to block * @param prefix prefix parameter * @param maxDocFreq max document frequency for a token to be considered a rare feature * @param indexFolder temporary index folder * @param output * @param report * @throws Exception */ public static void prefixBlockingWithLucene( String input, int lines, float prefix, int maxPrefixLength, int maxDocFreq, String indexFolder, String output, String report) throws Exception { long startTime = new Date().getTime(); Common.indexPrefix(input, lines, prefix, maxPrefixLength, indexFolder); IndexReader ireader = IndexReader.open(indexFolder); IndexSearcher isearcher = new IndexSearcher(ireader); TermEnum te = ireader.terms(); PrintWriter pw = IOFactory.getPrintWriter(output); int maxBlockSize = 0; int totalBlockSize = 0; int blockCount = 0; while (te.next()) { TopDocs td = isearcher.search(new TermQuery(te.term()), maxDocFreq + 1); // discard blocks with only one individual or of too frequent tokens if (td.scoreDocs.length <= 1 || td.scoreDocs.length > maxDocFreq) continue; if (td.scoreDocs.length > maxBlockSize) maxBlockSize = td.scoreDocs.length; totalBlockSize += td.scoreDocs.length; blockCount++; pw.print(ireader.document(td.scoreDocs[0].doc).get("id")); for (int i = 1; i < td.scoreDocs.length; i++) { pw.print(" " + ireader.document(td.scoreDocs[i].doc).get("id")); } pw.println(); if (blockCount % 1000 == 0) System.out.println(new Date().toString() + " : " + blockCount + " blocks"); } pw.close(); ireader.close(); long time = new Date().getTime() - startTime; pw = IOFactory.getPrintWriter(report, true); pw.println(new Date().toString()); pw.println("#individual: " + lines); pw.println("blocking parameter: " + prefix); pw.println("time: " + time); pw.println("#block: " + blockCount); pw.println("max block size: " + maxBlockSize); pw.println("avg block size: " + (totalBlockSize + 0.0) / blockCount); pw.close(); Common.deleteFolder(new File(indexFolder)); System.out.println(prefix + "\t" + lines + "\t" + time); // for speed test }
public void testSearchRunTime() throws IOException { int queryDocID; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new"))); int featureIndex = 0; ImageSearchHits hits = searchers[featureIndex].search(reader.document(0), reader); hits = searchers[featureIndex].search(reader.document(1), reader); long ms = System.currentTimeMillis(); for (int i = 0; i < 100; i++) { queryDocID = i; // select one feature for the large index: hits = searchers[featureIndex].search(reader.document(queryDocID), reader); } ms = System.currentTimeMillis() - ms; System.out.println("ms = " + ms / 100); }
public void testIndexedBit() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); FieldType onlyStored = new FieldType(); onlyStored.setStored(true); doc.add(new Field("field", "value", onlyStored)); doc.add(new StringField("field2", "value", Field.Store.YES)); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); assertFalse(r.document(0).getField("field").fieldType().indexed()); assertTrue(r.document(0).getField("field2").fieldType().indexed()); r.close(); dir.close(); }
// LUCENE-1219 public void testBinaryFieldOffsetLength() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); byte[] b = new byte[50]; for (int i = 0; i < 50; i++) b[i] = (byte) (i + 77); Document doc = new Document(); Field f = new StoredField("binary", b, 10, 17); byte[] bx = f.binaryValue().bytes; assertTrue(bx != null); assertEquals(50, bx.length); assertEquals(10, f.binaryValue().offset); assertEquals(17, f.binaryValue().length); doc.add(f); w.addDocument(doc); w.close(); IndexReader ir = DirectoryReader.open(dir); Document doc2 = ir.document(0); IndexableField f2 = doc2.getField("binary"); b = f2.binaryValue().bytes; assertTrue(b != null); assertEquals(17, b.length, 17); assertEquals(87, b[0]); ir.close(); dir.close(); }
// LUCENE-1727: make sure doc fields are stored in order public void testStoredFieldsOrder() throws Throwable { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); FieldType customType = new FieldType(); customType.setStored(true); doc.add(newField("zzz", "a b c", customType)); doc.add(newField("aaa", "a b c", customType)); doc.add(newField("zzz", "1 2 3", customType)); w.addDocument(doc); IndexReader r = w.getReader(); Document doc2 = r.document(0); Iterator<IndexableField> it = doc2.getFields().iterator(); assertTrue(it.hasNext()); Field f = (Field) it.next(); assertEquals(f.name(), "zzz"); assertEquals(f.stringValue(), "a b c"); assertTrue(it.hasNext()); f = (Field) it.next(); assertEquals(f.name(), "aaa"); assertEquals(f.stringValue(), "a b c"); assertTrue(it.hasNext()); f = (Field) it.next(); assertEquals(f.name(), "zzz"); assertEquals(f.stringValue(), "1 2 3"); assertFalse(it.hasNext()); r.close(); w.close(); d.close(); }
/** * Find words for a more-like-this query former. * * @param docNum the id of the lucene document from which to find terms */ private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException { Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>(); for (String fieldName : fieldNames) { final Fields vectors = ir.getTermVectors(docNum); final Terms vector; if (vectors != null) { vector = vectors.terms(fieldName); } else { vector = null; } // field does not store term vector info if (vector == null) { Document d = ir.document(docNum); IndexableField[] fields = d.getFields(fieldName); for (IndexableField field : fields) { final String stringValue = field.stringValue(); if (stringValue != null) { addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName); } } } else { addTermFrequencies(field2termFreqMap, vector, fieldName); } } return createQueue(field2termFreqMap); }
private void dumpDocuments() throws IOException { outputBanner("Documents"); int totalDocs = mIndexReader.numDocs(); outputLn(); outputLn("There are " + totalDocs + " documents in this index."); mConsole.debug("Total number of documents: " + totalDocs); for (int i = 0; i < totalDocs; i++) { Document doc = null; try { doc = mIndexReader.document(i, null); } catch (IllegalArgumentException e) { if ("attempt to access a deleted document".equals(e.getMessage())) { mConsole.warn( "encountered exception while dumping document " + i + ": " + e.getMessage()); } else { throw e; } } dumpDocument(i, doc); if ((i + 1) % 100 == 0) { mConsole.debug("Dumped " + (i + 1) + " documents"); } } }
private static Map<String, List<String>> generate_result(Directory directory) { Map<String, List<String>> result_map = new HashMap<String, List<String>>(); try { IndexReader reader = IndexReader.open(directory); TermEnum termEnum = reader.terms(); while (termEnum.next()) { String termEnumString = termEnum.term().toString(); if (termEnumString.startsWith("content:")) { String term = termEnumString.substring(termEnumString.lastIndexOf(":") + 1); TermDocs termDocs = reader.termDocs(termEnum.term()); while (termDocs.next()) { Document doc = reader.document(termDocs.doc()); String relative_path = doc.get("relative_path"); if (result_map.containsKey(relative_path)) { result_map.get(relative_path).add(term + termDocs.freq()); } else { result_map.put(relative_path, new ArrayList<String>()); } } } } } catch (IOException e) { e.printStackTrace(); } finally { } return result_map; }
private void remove(Class entity, Serializable id) { log.trace("remove from Lucene index: " + entity + "#" + id); DocumentBuilder builder = workspace.getDocumentBuilder(entity); Term term = builder.getTerm(id); IndexReader reader = workspace.getIndexReader(entity); TermDocs termDocs = null; try { // TODO is there a faster way? // TODO include TermDocs into the workspace? termDocs = reader.termDocs(term); String entityName = entity.getName(); while (termDocs.next()) { int docIndex = termDocs.doc(); if (entityName.equals(reader.document(docIndex).get(DocumentBuilder.CLASS_FIELDNAME))) { // remove only the one of the right class // loop all to remove all the matches (defensive code) reader.deleteDocument(docIndex); } } } catch (Exception e) { throw new HibernateException("Unable to remove from Lucene index: " + entity + "#" + id, e); } finally { if (termDocs != null) try { termDocs.close(); } catch (IOException e) { log.warn("Unable to close termDocs properly", e); } } }
/** @return the indexs */ public List<Index> getIndexes() { List<Index> indexes = new ArrayList<Index>(); // Method[] methods = Index.class.getDeclaredMethods(); int numDocs = reader.numDocs(); // System.out.println(numDocs); for (int i = 0; i < numDocs; i++) { try { Document document = reader.document(i); List<Fieldable> f = document.getFields(); Index index = new Index(); for (Fieldable fieldable : f) { Field field = (Field) fieldable; Method m = Index.class.getDeclaredMethod("set" + field.name(), new Class[] {String.class}); m.invoke(index, new Object[] {field.stringValue()}); // Method m2 = Index.class.getDeclaredMethod("get" + field.name(), new Class[]{}); // Object val = m2.invoke(index, new Object[]{}); // System.out.println(m2.getName()+" = "+val); // System.out.println(m.getName() + " " + field.stringValue()); } // System.out.println("RHAAR-"+i+" = "+index.getRHaarFeature()); indexes.add(index); } catch (Exception e) { e.printStackTrace(); } } return indexes; }
public void doTest(int[] docs) throws Exception { Directory dir = makeIndex(); IndexReader reader = IndexReader.open(dir, true); for (int i = 0; i < docs.length; i++) { Document d = reader.document(docs[i], SELECTOR); d.get(MAGIC_FIELD); List<Fieldable> fields = d.getFields(); for (Iterator<Fieldable> fi = fields.iterator(); fi.hasNext(); ) { Fieldable f = null; try { f = fi.next(); String fname = f.name(); String fval = f.stringValue(); assertNotNull(docs[i] + " FIELD: " + fname, fval); String[] vals = fval.split("#"); if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { fail("FIELD:" + fname + ",VAL:" + fval); } } catch (Exception e) { throw new Exception(docs[i] + " WTF: " + f.name(), e); } } } reader.close(); dir.close(); }
@Override public boolean reload(String collectionName, int docNum) { if (collectionName == null) return false; CrescentCollectionHandler collectionHandler = SpringApplicationContext.getBean( "crescentCollectionHandler", CrescentCollectionHandler.class); CrescentCollection collection = collectionHandler.getCrescentCollections().getCrescentCollection(collectionName); if (collection == null) { logger.debug("doesn't Collection Info => {}", collectionName); return false; } List<String> fieldName = new ArrayList<String>(); List<String> flag = new ArrayList<String>(); List<String> norm = new ArrayList<String>(); List<String> value = new ArrayList<String>(); try { Directory directory = FSDirectory.open(new File(collection.getIndexingDirectory())); IndexReader reader = IndexReader.open(directory); Document document = null; try { document = reader.document(docNum); } catch (IllegalArgumentException e) { e.printStackTrace(); return false; } String fName = null; for (Fieldable field : document.getFields()) { fName = field.name(); fieldName.add(fName); flag.add(fieldFlag(field)); if (reader.hasNorms(fName)) { norm.add(String.valueOf(Similarity.decodeNorm(reader.norms(fName)[docNum]))); } else { norm.add("---"); } value.add(field.stringValue()); } } catch (IOException e) { e.printStackTrace(); return false; } result.put("collection", collectionName); result.put("docNum", docNum); result.put("fieldName", fieldName); result.put("flag", flag); result.put("norm", norm); result.put("value", value); return true; }
/** * 根据docId查询索引文档 * * @param reader IndexReader对象 * @param docID documentId * @param fieldsToLoad 需要返回的field * @return */ public static Document findDocumentByDocId( IndexReader reader, int docID, Set<String> fieldsToLoad) { try { return reader.document(docID, fieldsToLoad); } catch (IOException e) { return null; } }
public void deleteFieldFromIndex(String fieldName, int docId, Analyzer analyzer) throws IOException, ConfigurationException { Document doc = reader.document(docId); doc.removeFields(fieldName); Field uri = doc.getField("URI"); Term term = new Term("URI", uri.stringValue()); writer.updateDocument(term, doc, analyzer); }
@Override protected Integer readFromDocument(IndexReader reader, int docId) throws IOException { // This implementation reads the length of the field ... Document doc = reader.document(docId, fieldSelector); String valueString = doc.get(fieldName); String value = stringFactory.create(valueString); return value != null ? value.length() : 0; }
protected Diff<Document, Diff<Fieldable, DocumentDiff>> compare( IndexReader reader1, IndexReader reader2, String keyFieldName) throws IOException, ParseException { Diff<Document, Diff<Fieldable, DocumentDiff>> result = new Diff<Document, Diff<Fieldable, DocumentDiff>>(); for (int docId = 0; docId < reader1.numDocs(); docId++) { if (!reader1.isDeleted(docId)) { Document doc1 = reader1.document(docId); Field keyField = doc1.getField(keyFieldName); if (keyField == null) { throw new IllegalArgumentException( "Key field " + keyFieldName + " should be defined in all docs in the index"); } Document doc2 = findByKey(reader2, keyField); if (doc2 == null) { result.addAdded(doc1); } else { Diff<Fieldable, DocumentDiff> diff = CompareUtils.diff(keyField.stringValue(), doc1, doc2); if (!diff.isEquals()) { result.addDiff(diff); } } } } for (int docId = 0; docId < reader2.numDocs(); docId++) { if (!reader2.isDeleted(docId)) { Document doc2 = reader2.document(docId); Field keyField = doc2.getField(keyFieldName); if (keyField == null) { throw new IllegalArgumentException( "Key field '" + keyFieldName + "' should be defined in all docs in the index"); } Document doc1 = findByKey(reader1, keyField); if (doc1 == null) { result.addRemoved(doc2); } } } return result; }
/** * @param reader * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ @SuppressWarnings("unchecked") private float[] findSimilar(IndexReader reader, LireFeature[] lireFeature) throws IOException { float[] maxDistance = new float[lireFeature.length]; float[] overallMaxDistance = new float[lireFeature.length]; for (int i = 0; i < overallMaxDistance.length; i++) { overallMaxDistance[i] = -1f; maxDistance[i] = -1f; } parDocs = new TreeSet[lireFeature.length]; for (int i = 0; i < parDocs.length; i++) { parDocs[i] = new TreeSet<SimpleResult>(); } // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); // clear result set ... int docs = reader.numDocs(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(i); float[] distance = getDistance(d, lireFeature); // calculate the overall max distance to normalize score afterwards for (int j = 0; j < distance.length; j++) { float f = distance[j]; if (overallMaxDistance[j] < f) { overallMaxDistance[j] = f; } // if it is the first document: if (maxDistance[j] < 0) { maxDistance[j] = f; } // if the array is not full yet: if (this.parDocs[j].size() < maxHits) { this.parDocs[j].add(new SimpleResult(f, d)); if (f > maxDistance[j]) { maxDistance[j] = f; } } else if (f < maxDistance[j]) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.parDocs[j].remove(this.parDocs[j].last()); // add the new one ... this.parDocs[j].add(new SimpleResult(f, d)); // and set our new distance border ... maxDistance[j] = this.parDocs[j].last().getDistance(); } } } return maxDistance; }
private Document findDoc(IndexReader reader, String file) throws IOException { for (int i = 0; i < reader.numDocs(); i++) { Document document = reader.document(i); String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; if (s.endsWith(File.separator + file)) { // System.out.println("s = " + s); return document; } } return null; }
// LUCENE-1262 public void testExceptions() throws Throwable { File indexDir = _TestUtil.getTempDir("testfieldswriterexceptions"); try { Directory dir = new FaultyFSDirectory(indexDir); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.CREATE)); for (int i = 0; i < 2; i++) writer.addDocument(testDoc); writer.forceMerge(1); writer.close(); IndexReader reader = DirectoryReader.open(dir); FaultyIndexInput.doFail = true; boolean exc = false; for (int i = 0; i < 2; i++) { try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } try { reader.document(i); } catch (IOException ioe) { // expected exc = true; } } assertTrue(exc); reader.close(); dir.close(); } finally { _TestUtil.rmDir(indexDir); } }
public void testSkipToFirsttimeHit() throws IOException { final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f); dq.add(tq("dek", "albino")); dq.add(tq("dek", "DOES_NOT_EXIST")); QueryUtils.check(dq, s); final Weight dw = dq.weight(s); final Scorer ds = dw.scorer(s.getIndexReader(), true, false); assertTrue("firsttime skipTo found no match", ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS); assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id")); }
/** * There was an error that images with the same score but different documents in the index were * not included in the result list. Here's the test for that. */ public void testDuplicatesInIndex() throws IOException { indexFiles("src\\test\\resources\\images", "index-large-new", 0, true); indexFiles("src\\test\\resources\\images", "index-large-new", 0, false); indexFiles("src\\test\\resources\\images", "index-large-new", 0, false); ImageSearcher s = searchers[0]; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new"))); Document query = reader.document(0); ImageSearchHits hits = s.search(query, reader); FileUtils.saveImageResultsToPng( "duplicate_", hits, query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); }
public void testOutputSearchResults() throws IOException, InstantiationException, IllegalAccessException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int docNum = 0; // doc to search for. // ----------- String query = reader.document(docNum).getValues("hash")[0]; CEDD ceddQuery = new CEDD(); ceddQuery.setByteArrayRepresentation( reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(createQuery(query), numImagesEval); FileUtils.saveImageResultsToPng( "result_lsh", topDocs, reader.document(docNum).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0], reader); }
@Override public void collect(int doc) throws IOException { String id = fieldData.stringValue(doc); // the _source is the query Document document = reader.document(doc, SourceFieldSelector.INSTANCE); byte[] source = document.getBinaryValue(SourceFieldMapper.NAME); try { queries.put(id, percolator.parseQuery(id, source, 0, source.length)); } catch (Exception e) { logger.warn("failed to add query [{}]", e, id); } }