public void test() throws IOException { assertTrue(dir != null); assertTrue(fieldInfos != null); IndexReader reader = DirectoryReader.open(dir); Document doc = reader.document(0); assertTrue(doc != null); assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null); Field field = (Field) doc.getField(DocHelper.TEXT_FIELD_2_KEY); assertTrue(field != null); assertTrue(field.fieldType().storeTermVectors()); assertFalse(field.fieldType().omitNorms()); assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); field = (Field) doc.getField(DocHelper.TEXT_FIELD_3_KEY); assertTrue(field != null); assertFalse(field.fieldType().storeTermVectors()); assertTrue(field.fieldType().omitNorms()); assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); field = (Field) doc.getField(DocHelper.NO_TF_KEY); assertTrue(field != null); assertFalse(field.fieldType().storeTermVectors()); assertFalse(field.fieldType().omitNorms()); assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(DocHelper.TEXT_FIELD_3_KEY); reader.document(0, visitor); final List<IndexableField> fields = visitor.getDocument().getFields(); assertEquals(1, fields.size()); assertEquals(DocHelper.TEXT_FIELD_3_KEY, fields.get(0).name()); reader.close(); }
@Test public void testWithPolyFieldsAndDocumentBoost() { SolrCore core = h.getCore(); IndexSchema schema = core.getLatestSchema(); assertFalse(schema.getField("store").omitNorms()); assertTrue(schema.getField("store_0_coordinate").omitNorms()); assertTrue(schema.getField("store_1_coordinate").omitNorms()); assertFalse(schema.getField("amount").omitNorms()); assertTrue( schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").omitNorms()); assertTrue( schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").omitNorms()); SolrInputDocument doc = new SolrInputDocument(); doc.setDocumentBoost(3.0f); doc.addField("store", "40.7143,-74.006"); doc.addField("amount", "10.5"); Document out = DocumentBuilder.toDocument(doc, schema); assertNotNull(out.get("store")); assertNotNull(out.getField("store_0_coordinate")); // NOTE: As the subtypes have omitNorm=true, they must have boost=1F, otherwise this is going to // fail when adding the doc to Lucene. assertTrue(1f == out.getField("store_0_coordinate").boost()); assertTrue(1f == out.getField("store_1_coordinate").boost()); assertTrue(1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").boost()); assertTrue( 1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").boost()); }
private void testLeftOpenRange(int precisionStep) throws Exception { String field = "field" + precisionStep; int count = 3000; long upper = (count - 1) * distance + (distance / 3) + startOffset; LegacyNumericRangeQuery<Long> q = LegacyNumericRangeQuery.newLongRange(field, precisionStep, null, upper, true, true); TopDocs topDocs = searcher.search(q, noDocs, Sort.INDEXORDER); ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count", count, sd.length); Document doc = searcher.doc(sd[0].doc); assertEquals("First doc", startOffset, doc.getField(field).numericValue().longValue()); doc = searcher.doc(sd[sd.length - 1].doc); assertEquals( "Last doc", (count - 1) * distance + startOffset, doc.getField(field).numericValue().longValue()); q = LegacyNumericRangeQuery.newLongRange(field, precisionStep, null, upper, false, true); topDocs = searcher.search(q, noDocs, Sort.INDEXORDER); sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count", count, sd.length); doc = searcher.doc(sd[0].doc); assertEquals("First doc", startOffset, doc.getField(field).numericValue().longValue()); doc = searcher.doc(sd[sd.length - 1].doc); assertEquals( "Last doc", (count - 1) * distance + startOffset, doc.getField(field).numericValue().longValue()); }
private void updateDoc(JSONObject object, Document doc) { String[] names = JSONObject.getNames(object); for (String name : names) { IndexableField fieldtype = checkFieldType(name, object, doc.getField(name)); if (doc.getField(name) == null) { doc.add(fieldtype); } } }
@Test public void testCopyFieldWithFieldBoost() { SolrCore core = h.getCore(); IndexSchema schema = core.getLatestSchema(); assertFalse(schema.getField("title").omitNorms()); assertTrue(schema.getField("title_stringNoNorms").omitNorms()); SolrInputDocument doc = new SolrInputDocument(); doc.addField("title", "mytitle", 3.0f); Document out = DocumentBuilder.toDocument(doc, schema); assertNotNull(out.get("title_stringNoNorms")); assertTrue( "title_stringNoNorms has the omitNorms attribute set to true, if the boost is different than 1.0, it will fail", 1.0f == out.getField("title_stringNoNorms").boost()); assertTrue("It is OK that title has a boost of 3", 3.0f == out.getField("title").boost()); }
public void testRAMDirectory() throws IOException { Directory dir = newFSDirectory(indexDir); MockDirectoryWrapper ramDir = new MockDirectoryWrapper(random, new RAMDirectory(dir)); // close the underlaying directory dir.close(); // Check size assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes()); // open reader to test document count IndexReader reader = IndexReader.open(ramDir, true); assertEquals(docsToAdd, reader.numDocs()); // open search zo check if all doc's are there IndexSearcher searcher = newSearcher(reader); // search for all documents for (int i = 0; i < docsToAdd; i++) { Document doc = searcher.doc(i); assertTrue(doc.getField("content") != null); } // cleanup reader.close(); searcher.close(); }
public static String hightlightFeild( Highlighter highlighter, Analyzer analyzer, Document doc, String feild) { String docContent = doc.get(feild); try { String hc = highlighter.getBestFragment(analyzer, feild, docContent); if (hc == null) { if (docContent.length() >= 50) { hc = docContent.substring(0, 50); } else { hc = docContent; } } doc.getField(feild).setValue(hc); // System.out.println(hc); return hc; } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return docContent; }
// LUCENE-1219 public void testBinaryFieldOffsetLength() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); byte[] b = new byte[50]; for (int i = 0; i < 50; i++) b[i] = (byte) (i + 77); Document doc = new Document(); Field f = new StoredField("binary", b, 10, 17); byte[] bx = f.binaryValue().bytes; assertTrue(bx != null); assertEquals(50, bx.length); assertEquals(10, f.binaryValue().offset); assertEquals(17, f.binaryValue().length); doc.add(f); w.addDocument(doc); w.close(); IndexReader ir = DirectoryReader.open(dir); Document doc2 = ir.document(0); IndexableField f2 = doc2.getField("binary"); b = f2.binaryValue().bytes; assertTrue(b != null); assertEquals(17, b.length, 17); assertEquals(87, b[0]); ir.close(); dir.close(); }
public String tokens(String field) { try { Field f = doc.getField(field); if (f == null) fail("No such field " + field); if (!f.isTokenized()) { String val = value(field); Token t = new Token(val, 0, val.length()); return t.getPositionIncrement() + " [" + t.termText() + "]"; } TokenStream ts = f.tokenStreamValue(); if (ts == null && f.stringValue() != null) ts = analyzer.tokenStream(field, f.stringValue()); if (ts == null && f.readerValue() != null) ts = analyzer.tokenStream(field, f.readerValue()); if (ts == null) fail("No token stream for field " + field); Token t = null; StringBuilder sb = new StringBuilder(); while ((t = ts.next()) != null) { sb.append(t.getPositionIncrement() + " [" + t.termText() + "] "); } return sb.toString().trim(); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); return null; } }
public void deleteFieldFromIndex(String fieldName, int docId, Analyzer analyzer) throws IOException, ConfigurationException { Document doc = reader.document(docId); doc.removeFields(fieldName); Field uri = doc.getField("URI"); Term term = new Term("URI", uri.stringValue()); writer.updateDocument(term, doc, analyzer); }
private List getHitIds(Query query) throws IOException { ArrayList matchIds = new ArrayList(); LinkedHashSet matchIdsSet = new LinkedHashSet(); if (query != null) { if (r != null) { if (!r.isCurrent()) { while (r.isLocked(dir)) ; r = IndexReader.open(dir); searcher = new IndexSearcher(r); } } else { r = IndexReader.open(dir); searcher = new IndexSearcher(r); } DocumentCollector s = new DocumentCollector(searcher); searcher.search(query, s); // searcher.search(query); searcher.close(); List hits = s.getStudies(); for (int i = 0; i < hits.size(); i++) { Document d = (Document) hits.get(i); Field authority = d.getField("authority"); String authorityStr = authority.stringValue(); Field studyId = d.getField("studyId"); String studyIdStr = studyId.stringValue(); /* if (studyIdStr.length() < 5) { StringBuffer pad = new StringBuffer(); for (int j = studyIdStr.length(); j < 5; j++) { pad.append('0'); } studyIdStr = pad.toString() + studyIdStr; } */ // Long studyIdLong = Long.valueOf(studyIdStr); String fileName = authorityStr + File.separator + studyIdStr; // System.out.println(fileName); matchIdsSet.add(fileName); } searcher.close(); } matchIds.addAll(matchIdsSet); return matchIds; }
protected Diff<Document, Diff<Fieldable, DocumentDiff>> compare( IndexReader reader1, IndexReader reader2, String keyFieldName) throws IOException, ParseException { Diff<Document, Diff<Fieldable, DocumentDiff>> result = new Diff<Document, Diff<Fieldable, DocumentDiff>>(); for (int docId = 0; docId < reader1.numDocs(); docId++) { if (!reader1.isDeleted(docId)) { Document doc1 = reader1.document(docId); Field keyField = doc1.getField(keyFieldName); if (keyField == null) { throw new IllegalArgumentException( "Key field " + keyFieldName + " should be defined in all docs in the index"); } Document doc2 = findByKey(reader2, keyField); if (doc2 == null) { result.addAdded(doc1); } else { Diff<Fieldable, DocumentDiff> diff = CompareUtils.diff(keyField.stringValue(), doc1, doc2); if (!diff.isEquals()) { result.addDiff(diff); } } } } for (int docId = 0; docId < reader2.numDocs(); docId++) { if (!reader2.isDeleted(docId)) { Document doc2 = reader2.document(docId); Field keyField = doc2.getField(keyFieldName); if (keyField == null) { throw new IllegalArgumentException( "Key field '" + keyFieldName + "' should be defined in all docs in the index"); } Document doc1 = findByKey(reader1, keyField); if (doc1 == null) { result.addRemoved(doc2); } } } return result; }
@Override public void modifyIndex(final IndexWriter writer, final IndexSearcher searcher) throws ModifyKnowledgeBaseException { for (final Map.Entry<String, HashMap<String, String>> entry : this.attributes.entrySet()) { final String key = entry.getKey(); final HashMap<String, String> hash = entry.getValue(); final QueryParser qp = new QueryParser(this.docPrimaryKey, new DoserIDAnalyzer()); try { final TopDocs top = searcher.search(qp.parse(QueryParserBase.escape(key)), 1); final ScoreDoc[] scores = top.scoreDocs; if (scores.length > 0) { final Document doc = new Document(); final Document currentDoc = searcher.getIndexReader().document(scores[0].doc); // BugFix create new Document und copy Fields. final List<IndexableField> fields = currentDoc.getFields(); for (final IndexableField field : fields) { if (field.stringValue() != null) { if (field.name().equalsIgnoreCase(docPrimaryKey)) { doc.add(new StringField(field.name(), field.stringValue(), Field.Store.YES)); } else { doc.add(new TextField(field.name(), field.stringValue(), Field.Store.YES)); } } } final List<Document> docListToAdd = new LinkedList<Document>(); docListToAdd.add(doc); for (final Map.Entry<String, String> subentry : hash.entrySet()) { final IndexableField field = doc.getField(subentry.getKey()); if (field == null) { throw new ModifyKnowledgeBaseException("UpdateField no found", null); } if (this.action.equals(KBModifications.OVERRIDEFIELD)) { doc.removeFields(subentry.getKey()); String[] newentries = generateSeperatedFieldStrings(subentry.getValue()); for (int i = 0; i < newentries.length; i++) { doc.add(new TextField(subentry.getKey(), newentries[i], Field.Store.YES)); } } else if (this.action.equals(KBModifications.UPDATERELATEDLABELS)) { doc.removeFields(subentry.getKey()); doc.add(updateOccurrences(subentry.getValue(), field, "surroundinglabels")); } else if (this.action.equals(KBModifications.UPDATEOCCURRENCES)) { doc.removeFields(subentry.getKey()); IndexableField f = updateOccurrences(subentry.getValue(), field, "occurrences"); doc.add(f); } } writer.updateDocuments(new Term(this.docPrimaryKey, key), docListToAdd); } else { throw new ModifyKnowledgeBaseException("Document not found", null); } } catch (final IOException e) { throw new ModifyKnowledgeBaseException("IOException in IndexSearcher", e); } catch (ParseException e) { throw new ModifyKnowledgeBaseException("Queryparser Exception", e); } } }
private void mockDocument(int docId, double price, double discount, boolean isCloseout) throws IOException { Document doc = PowerMockito.mock(Document.class); when(searcher.doc(eq(docId), any(Set.class))).thenReturn(doc); when(searcher.getSchema()).thenReturn(schema); IndexableField priceField = mock(IndexableField.class); when(doc.getField(FIELD_PRICE)).thenReturn(priceField); when(priceField.numericValue()).thenReturn(new Double(price)); IndexableField discountField = mock(IndexableField.class); when(doc.getField(FIELD_DISCOUNT)).thenReturn(discountField); when(discountField.numericValue()).thenReturn(new Double(discount)); IndexableField closeoutField = mock(IndexableField.class); when(doc.getField(FIELD_CLOSEOUT)).thenReturn(closeoutField); when(closeoutField.stringValue()).thenReturn(isCloseout ? "T" : "F"); }
@Override public Object get(String name, Document document) { final IndexableField field = document.getField(name); if (field != null) { return new Date((long) field.numericValue()); } else { return null; } }
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: if (!IndexReader.indexExists(reader.directory())) throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); ScalableColor sc = null; ColorLayout cl = null; EdgeHistogram eh = null; String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT); if (cls != null && cls.length > 0) { cl = new ColorLayout(); cl.setStringRepresentation(cls[0]); } String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR); if (scs != null && scs.length > 0) { sc = new ScalableColor(); sc.setStringRepresentation(scs[0]); } String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM); if (ehs != null && ehs.length > 0) { eh = new EdgeHistogram(); eh.setStringRepresentation(ehs[0]); } HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // find duplicates ... boolean hasDeletions = reader.hasDeletions(); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (hasDeletions && reader.isDeleted(i)) { continue; } Document d = reader.document(i); float distance = getDistance(d, cl, sc, eh); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } return new SimpleImageDuplicates(results); }
@Override public String getShardIdentifier( Class<?> entityType, Serializable id, String idAsString, Document document) { if (entityType.equals(Animal.class)) { final String typeValue = document.getField("type").stringValue(); addShard(typeValue); return typeValue; } throw new RuntimeException("Animal expected but found " + entityType); }
@Test public void testWriteFields() { String[] fields = new String[] {"s", "i"}; PdxLuceneSerializer mapper = new PdxLuceneSerializer(fields); PdxInstance i = Mockito.mock(PdxInstance.class); Mockito.when(i.hasField("s")).thenReturn(true); Mockito.when(i.hasField("i")).thenReturn(true); Mockito.when(i.getField("s")).thenReturn("a"); Mockito.when(i.getField("i")).thenReturn(5); Document doc = new Document(); mapper.toDocument(i, doc); assertEquals(2, doc.getFields().size()); assertEquals("a", doc.getField("s").stringValue()); assertEquals(5, doc.getField("i").numericValue()); }
@Test public void baseUIMAAnalyzerIntegrationTest() throws Exception { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer)); // add the first doc Document doc = new Document(); String dummyTitle = "this is a dummy title "; doc.add(new TextField("title", dummyTitle, Field.Store.YES)); String dummyContent = "there is some content written here"; doc.add(new TextField("contents", dummyContent, Field.Store.YES)); writer.addDocument(doc); writer.commit(); // try the search over the first doc DirectoryReader directoryReader = DirectoryReader.open(dir); IndexSearcher indexSearcher = newSearcher(directoryReader); TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 1); assertTrue(result.totalHits > 0); Document d = indexSearcher.doc(result.scoreDocs[0].doc); assertNotNull(d); assertNotNull(d.getField("title")); assertEquals(dummyTitle, d.getField("title").stringValue()); assertNotNull(d.getField("contents")); assertEquals(dummyContent, d.getField("contents").stringValue()); // add a second doc doc = new Document(); String dogmasTitle = "dogmas"; doc.add(new TextField("title", dogmasTitle, Field.Store.YES)); String dogmasContents = "white men can't jump"; doc.add(new TextField("contents", dogmasContents, Field.Store.YES)); writer.addDocument(doc); writer.commit(); directoryReader.close(); directoryReader = DirectoryReader.open(dir); indexSearcher = newSearcher(directoryReader); result = indexSearcher.search(new MatchAllDocsQuery(), 2); Document d1 = indexSearcher.doc(result.scoreDocs[1].doc); assertNotNull(d1); assertNotNull(d1.getField("title")); assertEquals(dogmasTitle, d1.getField("title").stringValue()); assertNotNull(d1.getField("contents")); assertEquals(dogmasContents, d1.getField("contents").stringValue()); // do a matchalldocs query to retrieve both docs result = indexSearcher.search(new MatchAllDocsQuery(), 2); assertEquals(2, result.totalHits); writer.close(); indexSearcher.getIndexReader().close(); dir.close(); }
@Test public void testMultiField() throws Exception { SolrCore core = h.getCore(); // make sure a null value is not indexed SolrInputDocument doc = new SolrInputDocument(); doc.addField("home", "2.2,3.3", 1.0f); Document out = DocumentBuilder.toDocument(doc, core.getLatestSchema()); assertNotNull(out.get("home")); // contains the stored value and term vector, if there is one assertNotNull(out.getField("home_0" + FieldType.POLY_FIELD_SEPARATOR + "double")); assertNotNull(out.getField("home_1" + FieldType.POLY_FIELD_SEPARATOR + "double")); }
public void testReadSkip() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); FieldType ft = new FieldType(); ft.setStored(true); ft.freeze(); final String string = _TestUtil.randomSimpleString(random(), 50); final byte[] bytes = string.getBytes("UTF-8"); final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong(); final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt(); final float f = random().nextFloat(); final double d = random().nextDouble(); List<Field> fields = Arrays.asList( new Field("bytes", bytes, ft), new Field("string", string, ft), new LongField("long", l, Store.YES), new IntField("int", i, Store.YES), new FloatField("float", f, Store.YES), new DoubleField("double", d, Store.YES)); for (int k = 0; k < 100; ++k) { Document doc = new Document(); for (Field fld : fields) { doc.add(fld); } iw.w.addDocument(doc); } iw.commit(); final DirectoryReader reader = DirectoryReader.open(dir); final int docID = random().nextInt(100); for (Field fld : fields) { String fldName = fld.name(); final Document sDoc = reader.document(docID, Collections.singleton(fldName)); final IndexableField sField = sDoc.getField(fldName); if (Field.class.equals(fld.getClass())) { assertEquals(fld.binaryValue(), sField.binaryValue()); assertEquals(fld.stringValue(), sField.stringValue()); } else { assertEquals(fld.numericValue(), sField.numericValue()); } } reader.close(); iw.close(); dir.close(); }
private boolean verifyIndex(Directory directory, int startAt) throws IOException { boolean fail = false; IndexReader reader = DirectoryReader.open(directory); int max = reader.maxDoc(); for (int i = 0; i < max; i++) { Document temp = reader.document(i); // System.out.println("doc "+i+"="+temp.getField("count").stringValue()); // compare the index doc number to the value that it should be if (!temp.getField("count").stringValue().equals((i + startAt) + "")) { fail = true; System.out.println( "Document " + (i + startAt) + " is returning document " + temp.getField("count").stringValue()); } } reader.close(); return fail; }
/** * We assume that the initial indexing has been done and a set of reference objects has been found * and indexed in the separate directory. However further documents were added and they now need * to get a ranked list of reference objects. So we (i) get all these new documents missing the * field "ro-order" and (ii) add this field. * * @param indexPath the index to update * @throws IOException */ public void updateIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); boolean hasDeletions = reader.hasDeletions(); int countUpdated = 0; IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1); perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField); IndexWriter iw = new IndexWriter( FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); if (document.getField("ro-order") == null) { // if the field is not here we create it. ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument( new Term( DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); countUpdated++; } // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); // debug: System.out.println("countUpdated = " + countUpdated); } iw.commit(); iw.close(); }
private void assertTopDocs(TopDocs topDocs, String... ids) throws IOException { assertTrue(ids.length <= topDocs.totalHits, "Query has more hits than expected"); Set<String> foundIDs = new HashSet<String>(); for (ScoreDoc doc : topDocs.scoreDocs) { Document foundDocument = indexSearcher.doc(doc.doc); foundIDs.add(foundDocument.getField("id").stringValue()); } for (String id : ids) { assertTrue(foundIDs.contains(id), "ID [" + id + "] was not found in query results"); } }
/** * test for constant score + boolean query + filter, the other tests only use the constant score * mode */ private void testRange(int precisionStep) throws Exception { String field = "field" + precisionStep; int count = 3000; long lower = (distance * 3 / 2) + startOffset, upper = lower + count * distance + (distance / 3); LegacyNumericRangeQuery<Long> q = LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); for (byte i = 0; i < 2; i++) { TopDocs topDocs; String type; switch (i) { case 0: type = " (constant score filter rewrite)"; q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE); topDocs = searcher.search(q, noDocs, Sort.INDEXORDER); break; case 1: type = " (constant score boolean rewrite)"; q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE); topDocs = searcher.search(q, noDocs, Sort.INDEXORDER); break; default: return; } ScoreDoc[] sd = topDocs.scoreDocs; assertNotNull(sd); assertEquals("Score doc count" + type, count, sd.length); Document doc = searcher.doc(sd[0].doc); assertEquals( "First doc" + type, 2 * distance + startOffset, doc.getField(field).numericValue().longValue()); doc = searcher.doc(sd[sd.length - 1].doc); assertEquals( "Last doc" + type, (1 + count) * distance + startOffset, doc.getField(field).numericValue().longValue()); } }
private String fieldsToString(Document doc, String[] fields) { StringBuffer buffer = new StringBuffer(); for (int i = 0; i < fields.length; i++) { Field field = doc.getField(fields[i]); if (field == null) { continue; } if (buffer.length() > 0) { buffer.append(", "); } buffer.append(field.stringValue()); } return buffer.toString(); }
protected NamedList serializeTopDocs(QueryCommandResult result) throws IOException { NamedList<Object> queryResult = new NamedList<>(); queryResult.add("matches", result.getMatches()); queryResult.add("totalHits", result.getTopDocs().totalHits); // debug: assert !Float.isNaN(result.getTopDocs().getMaxScore()) == // rb.getGroupingSpec().isNeedScore(); if (!Float.isNaN(result.getTopDocs().getMaxScore())) { queryResult.add("maxScore", result.getTopDocs().getMaxScore()); } List<NamedList> documents = new ArrayList<>(); queryResult.add("documents", documents); final IndexSchema schema = rb.req.getSearcher().getSchema(); SchemaField uniqueField = schema.getUniqueKeyField(); for (ScoreDoc scoreDoc : result.getTopDocs().scoreDocs) { NamedList<Object> document = new NamedList<>(); documents.add(document); Document doc = retrieveDocument(uniqueField, scoreDoc.doc); document.add("id", uniqueField.getType().toExternal(doc.getField(uniqueField.getName()))); if (!Float.isNaN(scoreDoc.score)) { document.add("score", scoreDoc.score); } if (!FieldDoc.class.isInstance(scoreDoc)) { continue; // thus don't add sortValues below } FieldDoc fieldDoc = (FieldDoc) scoreDoc; Object[] convertedSortValues = new Object[fieldDoc.fields.length]; for (int j = 0; j < fieldDoc.fields.length; j++) { Object sortValue = fieldDoc.fields[j]; Sort groupSort = rb.getGroupingSpec().getGroupSort(); SchemaField field = groupSort.getSort()[j].getField() != null ? schema.getFieldOrNull(groupSort.getSort()[j].getField()) : null; if (field != null) { FieldType fieldType = field.getType(); if (sortValue != null) { sortValue = fieldType.marshalSortValue(sortValue); } } convertedSortValues[j] = sortValue; } document.add("sortValues", convertedSortValues); } return queryResult; }
@Test public void testRealisticKeys() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); LineFileDocs lineFileDocs = new LineFileDocs(random()); int num = Math.min(1000, atLeast(100)); Map<String, Integer> mappings = new HashMap<>(); for (int i = 0; i < num; i++) { Document document = lineFileDocs.nextDoc(); String title = document.getField("title").stringValue(); int weight = Math.abs(random().nextInt()); Integer prevWeight = mappings.get(title); if (prevWeight == null || prevWeight < weight) { mappings.put(title, weight); } Document doc = new Document(); doc.add(new SuggestField("suggest_field", title, weight)); iw.addDocument(doc); if (rarely()) { iw.commit(); } } DirectoryReader reader = iw.getReader(); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); for (Map.Entry<String, Integer> entry : mappings.entrySet()) { String title = entry.getKey(); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", title)); TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size()); assertTrue(suggest.totalHits > 0); boolean matched = false; for (ScoreDoc scoreDoc : suggest.scoreDocs) { matched = Float.compare(scoreDoc.score, (float) entry.getValue()) == 0; if (matched) { break; } } assertTrue("at least one of the entries should have the score", matched); } reader.close(); iw.close(); }
public void testLsaFilter() throws IOException { // index images // indexFiles(); // search System.out.println("---< searching >-------------------------"); IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); Document document = reader.document(0); ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(100); ImageSearchHits hits = searcher.search(document, reader); // rerank System.out.println("---< filtering >-------------------------"); LsaFilter filter = new LsaFilter(CEDD.class, DocumentBuilder.FIELD_NAME_CEDD); hits = filter.filter(hits, document); // output FileUtils.saveImageResultsToHtml( "filtertest", hits, document.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); }
@Override public float extract(Document doc, Terms terms, RerankerContext context) { final String str = doc.getField(StatusField.TEXT.name).stringValue(); final String matchStr = "http://"; int lastIndex = 0; int count = 0; while (lastIndex != -1) { lastIndex = str.indexOf(matchStr, lastIndex); if (lastIndex != -1) { count++; lastIndex += matchStr.length(); } } return (float) count; }