public void testRandomIWReader() throws Throwable { Directory dir = newDirectory(); // TODO: verify equals using IW.getReader DocsAndWriter dw = indexRandomIWReader(5, 3, 100, dir); DirectoryReader reader = dw.writer.getReader(); dw.writer.commit(); verifyEquals(random(), reader, dir, "id"); reader.close(); dw.writer.close(); dir.close(); }
public void testUpdateSameDoc() throws Exception { final Directory dir = newDirectory(); final LineFileDocs docs = new LineFileDocs(random()); for (int r = 0; r < 3; r++) { final IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2)); final int numUpdates = atLeast(20); int numThreads = TestUtil.nextInt(random(), 2, 6); IndexingThread[] threads = new IndexingThread[numThreads]; for (int i = 0; i < numThreads; i++) { threads[i] = new IndexingThread(docs, w, numUpdates); threads[i].start(); } for (int i = 0; i < numThreads; i++) { threads[i].join(); } w.close(); } IndexReader open = DirectoryReader.open(dir); assertEquals(1, open.numDocs()); open.close(); docs.close(); dir.close(); }
public void verifyEquals(Directory dir1, Directory dir2, String idField) throws Throwable { DirectoryReader r1 = DirectoryReader.open(dir1); DirectoryReader r2 = DirectoryReader.open(dir2); verifyEquals(r1, r2, idField); r1.close(); r2.close(); }
private static void printDocs(DirectoryReader r) throws Throwable { for (AtomicReaderContext ctx : r.leaves()) { // TODO: improve this AtomicReader sub = ctx.reader(); Bits liveDocs = sub.getLiveDocs(); System.out.println(" " + ((SegmentReader) sub).getSegmentInfo()); for (int docID = 0; docID < sub.maxDoc(); docID++) { StoredDocument doc = sub.document(docID); if (liveDocs == null || liveDocs.get(docID)) { System.out.println(" docID=" + docID + " id:" + doc.get("id")); } else { System.out.println(" DEL docID=" + docID + " id:" + doc.get("id")); } } } }
@Override public void run() { try { DirectoryReader open = null; for (int i = 0; i < num; i++) { Document doc = new Document(); // docs.nextDoc(); BytesRef br = new BytesRef("test"); doc.add(newStringField("id", br, Field.Store.NO)); writer.updateDocument(new Term("id", br), doc); if (random().nextInt(3) == 0) { if (open == null) { open = DirectoryReader.open(writer, true); } DirectoryReader reader = DirectoryReader.openIfChanged(open); if (reader != null) { open.close(); open = reader; } assertEquals( "iter: " + i + " numDocs: " + open.numDocs() + " del: " + open.numDeletedDocs() + " max: " + open.maxDoc(), 1, open.numDocs()); } } if (open != null) { open.close(); } } catch (Exception e) { throw new RuntimeException(e); } }
public static void main(String[] args) throws IOException { IndexReader reader = null; /* * Opening the index first simplifies the processing of the * rest of the command line arguments. */ for (int i = 0; i < args.length; i++) { if (("-index".equals(args[i])) && ((i + 1) < args.length)) { reader = DirectoryReader.open(FSDirectory.open(new File(args[i + 1]))); if (reader == null) { System.err.println("Error: Can't open index " + args[i + 1]); System.exit(1); } ; break; } ; } ; if (reader == null) { System.err.println(usage); System.exit(1); } ; /* * Process the command line arguments sequentially. */ for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { /* * Handled in the previous loop, so just skip the argument. */ i++; } else if ("-list-edocid".equals(args[i])) { System.out.println("-list-edocid:"); if ((i + 1) >= args.length) { System.out.println(usage); break; } ; Document d = reader.document(Integer.parseInt(args[i + 1])); System.out.println( "Internal docid --> External docid: " + args[i + 1] + " --> " + d.get("externalId")); i += 1; } else if ("-list-docids".equals(args[i])) { System.out.println("-list-docids:"); for (int j = 0; j < reader.numDocs(); j++) { Document d = reader.document(j); System.out.println("Internal --> external docid: " + j + " --> " + d.get("externalId")); } ; } else if ("-list-fields".equals(args[i])) { Fields fields = MultiFields.getFields(reader); System.out.print("\nNumber of fields: "); if (fields == null) System.out.println("0"); else { System.out.println(fields.size()); Iterator<String> is = fields.iterator(); while (is.hasNext()) { System.out.println("\t" + is.next()); } ; } ; } else if ("-list-postings".equals(args[i])) { if ((i + 2) >= args.length) { System.out.println(usage); break; } ; listPostings(reader, args[i + 1], args[i + 2], Integer.MAX_VALUE); i += 2; } else if ("-list-postings-sample".equals(args[i])) { if ((i + 2) >= args.length) { System.out.println(usage); break; } ; listPostings(reader, args[i + 1], args[i + 2], 5); i += 2; } else if ("-list-stats".equals(args[i])) { System.out.println("Corpus statistics:"); System.out.println("\tnumdocs\t\t" + reader.numDocs()); System.out.println( "\turl:\t" + "\tnumdocs=" + reader.getDocCount("url") + "\tsumTotalTF=" + reader.getSumTotalTermFreq("url") + "\tavglen=" + reader.getSumTotalTermFreq("url") / (float) reader.getDocCount("url")); System.out.println( "\tkeywords:" + "\tnumdocs=" + reader.getDocCount("keywords") + "\tsumTotalTF=" + reader.getSumTotalTermFreq("keywords") + "\tavglen=" + reader.getSumTotalTermFreq("keywords") / (float) reader.getDocCount("keywords")); System.out.println( "\ttitle:\t" + "\tnumdocs=" + reader.getDocCount("title") + "\tsumTotalTF=" + reader.getSumTotalTermFreq("title") + "\tavglen=" + reader.getSumTotalTermFreq("title") / (float) reader.getDocCount("title")); System.out.println( "\tbody:\t" + "\tnumdocs=" + reader.getDocCount("body") + "\tsumTotalTF=" + reader.getSumTotalTermFreq("body") + "\tavglen=" + reader.getSumTotalTermFreq("body") / (float) reader.getDocCount("body")); System.out.println( "\tinlink:\t" + "\tnumdocs=" + reader.getDocCount("inlink") + "\tsumTotalTF=" + reader.getSumTotalTermFreq("inlink") + "\tavglen=" + reader.getSumTotalTermFreq("inlink") / (float) reader.getDocCount("inlink")); } else if ("-list-terms".equals(args[i])) { if ((i + 1) >= args.length) { System.out.println(usage); break; } ; listTermDictionary(reader, args[i + 1]); i += 1; } else if ("-list-termvector".equals(args[i])) { if ((i + 1) >= args.length) { System.out.println(usage); break; } ; listTermVectors(reader, args[i + 1]); i += 1; } else if ("-list-termvector-field".equals(args[i])) { if ((i + 2) >= args.length) { System.out.println(usage); break; } ; listTermVectorField(reader, args[i + 1], args[i + 2]); i += 2; } else System.err.println("\nWarning: Unknown argument " + args[i] + " ignored."); } ; /* * Close the index and exit gracefully. */ reader.close(); }
public void verifyEquals(DirectoryReader r1, DirectoryReader r2, String idField) throws Throwable { if (VERBOSE) { System.out.println("\nr1 docs:"); printDocs(r1); System.out.println("\nr2 docs:"); printDocs(r2); } if (r1.numDocs() != r2.numDocs()) { assert false : "r1.numDocs()=" + r1.numDocs() + " vs r2.numDocs()=" + r2.numDocs(); } boolean hasDeletes = !(r1.maxDoc() == r2.maxDoc() && r1.numDocs() == r1.maxDoc()); int[] r2r1 = new int[r2.maxDoc()]; // r2 id to r1 id mapping // create mapping from id2 space to id2 based on idField final Fields f1 = MultiFields.getFields(r1); if (f1 == null) { // make sure r2 is empty assertNull(MultiFields.getFields(r2)); return; } final Terms terms1 = f1.terms(idField); if (terms1 == null) { assertTrue( MultiFields.getFields(r2) == null || MultiFields.getFields(r2).terms(idField) == null); return; } final TermsEnum termsEnum = terms1.iterator(null); final Bits liveDocs1 = MultiFields.getLiveDocs(r1); final Bits liveDocs2 = MultiFields.getLiveDocs(r2); Fields fields = MultiFields.getFields(r2); if (fields == null) { // make sure r1 is in fact empty (eg has only all // deleted docs): Bits liveDocs = MultiFields.getLiveDocs(r1); DocsEnum docs = null; while (termsEnum.next() != null) { docs = TestUtil.docs(random(), termsEnum, liveDocs, docs, DocsEnum.FLAG_NONE); while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { fail("r1 is not empty but r2 is"); } } return; } Terms terms2 = fields.terms(idField); TermsEnum termsEnum2 = terms2.iterator(null); DocsEnum termDocs1 = null; DocsEnum termDocs2 = null; while (true) { BytesRef term = termsEnum.next(); // System.out.println("TEST: match id term=" + term); if (term == null) { break; } termDocs1 = TestUtil.docs(random(), termsEnum, liveDocs1, termDocs1, DocsEnum.FLAG_NONE); if (termsEnum2.seekExact(term)) { termDocs2 = TestUtil.docs(random(), termsEnum2, liveDocs2, termDocs2, DocsEnum.FLAG_NONE); } else { termDocs2 = null; } if (termDocs1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { // This doc is deleted and wasn't replaced assertTrue(termDocs2 == null || termDocs2.nextDoc() == DocIdSetIterator.NO_MORE_DOCS); continue; } int id1 = termDocs1.docID(); assertEquals(DocIdSetIterator.NO_MORE_DOCS, termDocs1.nextDoc()); assertTrue(termDocs2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); int id2 = termDocs2.docID(); assertEquals(DocIdSetIterator.NO_MORE_DOCS, termDocs2.nextDoc()); r2r1[id2] = id1; // verify stored fields are equivalent try { verifyEquals(r1.document(id1), r2.document(id2)); } catch (Throwable t) { System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); System.out.println(" d1=" + r1.document(id1)); System.out.println(" d2=" + r2.document(id2)); throw t; } try { // verify term vectors are equivalent verifyEquals(r1.getTermVectors(id1), r2.getTermVectors(id2)); } catch (Throwable e) { System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); Fields tv1 = r1.getTermVectors(id1); System.out.println(" d1=" + tv1); if (tv1 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; for (String field : tv1) { System.out.println(" " + field + ":"); Terms terms3 = tv1.terms(field); assertNotNull(terms3); TermsEnum termsEnum3 = terms3.iterator(null); BytesRef term2; while ((term2 = termsEnum3.next()) != null) { System.out.println( " " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq()); dpEnum = termsEnum3.docsAndPositions(null, dpEnum); if (dpEnum != null) { assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); final int freq = dpEnum.freq(); System.out.println(" doc=" + dpEnum.docID() + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { System.out.println(" pos=" + dpEnum.nextPosition()); } } else { dEnum = TestUtil.docs(random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS); assertNotNull(dEnum); assertTrue(dEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); final int freq = dEnum.freq(); System.out.println(" doc=" + dEnum.docID() + " freq=" + freq); } } } } Fields tv2 = r2.getTermVectors(id2); System.out.println(" d2=" + tv2); if (tv2 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; for (String field : tv2) { System.out.println(" " + field + ":"); Terms terms3 = tv2.terms(field); assertNotNull(terms3); TermsEnum termsEnum3 = terms3.iterator(null); BytesRef term2; while ((term2 = termsEnum3.next()) != null) { System.out.println( " " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq()); dpEnum = termsEnum3.docsAndPositions(null, dpEnum); if (dpEnum != null) { assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); final int freq = dpEnum.freq(); System.out.println(" doc=" + dpEnum.docID() + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { System.out.println(" pos=" + dpEnum.nextPosition()); } } else { dEnum = TestUtil.docs(random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS); assertNotNull(dEnum); assertTrue(dEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); final int freq = dEnum.freq(); System.out.println(" doc=" + dEnum.docID() + " freq=" + freq); } } } } throw e; } } // System.out.println("TEST: done match id"); // Verify postings // System.out.println("TEST: create te1"); final Fields fields1 = MultiFields.getFields(r1); final Iterator<String> fields1Enum = fields1.iterator(); final Fields fields2 = MultiFields.getFields(r2); final Iterator<String> fields2Enum = fields2.iterator(); String field1 = null, field2 = null; TermsEnum termsEnum1 = null; termsEnum2 = null; DocsEnum docs1 = null, docs2 = null; // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.numDocs()]; long[] info2 = new long[r2.numDocs()]; for (; ; ) { BytesRef term1 = null, term2 = null; // iterate until we get some docs int len1; for (; ; ) { len1 = 0; if (termsEnum1 == null) { if (!fields1Enum.hasNext()) { break; } field1 = fields1Enum.next(); Terms terms = fields1.terms(field1); if (terms == null) { continue; } termsEnum1 = terms.iterator(null); } term1 = termsEnum1.next(); if (term1 == null) { // no more terms in this field termsEnum1 = null; continue; } // System.out.println("TEST: term1=" + term1); docs1 = TestUtil.docs(random(), termsEnum1, liveDocs1, docs1, DocsEnum.FLAG_FREQS); while (docs1.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = docs1.docID(); int f = docs1.freq(); info1[len1] = (((long) d) << 32) | f; len1++; } if (len1 > 0) break; } // iterate until we get some docs int len2; for (; ; ) { len2 = 0; if (termsEnum2 == null) { if (!fields2Enum.hasNext()) { break; } field2 = fields2Enum.next(); Terms terms = fields2.terms(field2); if (terms == null) { continue; } termsEnum2 = terms.iterator(null); } term2 = termsEnum2.next(); if (term2 == null) { // no more terms in this field termsEnum2 = null; continue; } // System.out.println("TEST: term1=" + term1); docs2 = TestUtil.docs(random(), termsEnum2, liveDocs2, docs2, DocsEnum.FLAG_FREQS); while (docs2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = r2r1[docs2.docID()]; int f = docs2.freq(); info2[len2] = (((long) d) << 32) | f; len2++; } if (len2 > 0) break; } assertEquals(len1, len2); if (len1 == 0) break; // no more terms assertEquals(field1, field2); assertTrue(term1.bytesEquals(term2)); if (!hasDeletes) assertEquals(termsEnum1.docFreq(), termsEnum2.docFreq()); assertEquals("len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes, term1, term2); // sort info2 to get it into ascending docid Arrays.sort(info2, 0, len2); // now compare for (int i = 0; i < len1; i++) { assertEquals( "i=" + i + " len=" + len1 + " d1=" + (info1[i] >>> 32) + " f1=" + (info1[i] & Integer.MAX_VALUE) + " d2=" + (info2[i] >>> 32) + " f2=" + (info2[i] & Integer.MAX_VALUE) + " field=" + field1 + " term=" + term1.utf8ToString(), info1[i], info2[i]); } } }