@Override public Document document(int docid) throws IOException { if (_subReaders != null) { int readerIndex = readerIndex(docid, _starts, _subReaders.length); BoboIndexReader subReader = _subReaders[readerIndex]; return subReader.document(docid - _starts[readerIndex]); } else { Document doc = super.document(docid); Collection<FacetHandler<?>> facetHandlers = _facetHandlerMap.values(); for (FacetHandler<?> facetHandler : facetHandlers) { String[] vals = facetHandler.getFieldValues(this, docid); if (vals != null) { String[] values = doc.getValues(facetHandler.getName()); Set<String> storedVals = new HashSet<String>(Arrays.asList(values)); for (String val : vals) { storedVals.add(val); } doc.removeField(facetHandler.getName()); for (String val : storedVals) { doc.add( new Field(facetHandler.getName(), val, Field.Store.NO, Field.Index.NOT_ANALYZED)); } } } return doc; } }
@Override public void analyze(Document doc, InputStream in) throws IOException { if (in.read() != 'B') { throw new IOException("Not BZIP2 format"); } if (in.read() != 'Z') { throw new IOException("Not BZIP2 format"); } BufferedInputStream gzis = new BufferedInputStream(new CBZip2InputStream(in)); String path = doc.get("path"); if (path != null && (path.endsWith(".bz2") || path.endsWith(".BZ2") || path.endsWith(".bz"))) { String newname = path.substring(0, path.lastIndexOf('.')); // System.err.println("BZIPPED OF = " + newname); fa = AnalyzerGuru.getAnalyzer(gzis, newname); if (fa instanceof BZip2Analyzer) { fa = null; } else { if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) { this.g = Genre.XREFABLE; } else { this.g = Genre.DATA; } fa.analyze(doc, gzis); if (doc.get("t") != null) { doc.removeField("t"); if (g == Genre.XREFABLE) { doc.add(new Field("t", g.typeName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); } } } } }
@Test(expected = FailedPopulateException.class) public void populateShouldFailWithDocumentMissingMandatoryField() throws IndexFailureException, InitializationException { Populator<HostPathogen> pop = new HostPathogenPopulator<HostPathogen>(); Document d = makeValidDocument(); d.removeField(BasePopulator.stored(LuceneFields.FK_PATHOGEN_ID)); HostPathogen ht = pop.populate(d); }
@Test public void populateShouldSucceedWithDocumentMissingNonMandatoryField() throws IndexFailureException, InitializationException { Populator<HostPathogen> pop = new HostPathogenPopulator<HostPathogen>(); Document d = makeValidDocument(); d.removeField(BasePopulator.stored(LuceneFields.RUST_STATE)); HostPathogen ht = pop.populate(d); Assert.assertTrue(ht != null); }
private byte[] extractSource(Document doc, DocumentMapper documentMapper) { byte[] source = null; Fieldable sourceField = doc.getFieldable(documentMapper.sourceMapper().names().indexName()); if (sourceField != null) { source = documentMapper.sourceMapper().nativeValue(sourceField); doc.removeField(documentMapper.sourceMapper().names().indexName()); } return source; }
/** * Reads the snapshots information from the given {@link Directory}. This method can be used if * the snapshots information is needed, however you cannot instantiate the deletion policy * (because e.g., some other process keeps a lock on the snapshots directory). */ public static Map<String, String> readSnapshotsInfo(Directory dir) throws IOException { IndexReader r = DirectoryReader.open(dir); Map<String, String> snapshots = new HashMap<String, String>(); try { int numDocs = r.numDocs(); // index is allowed to have exactly one document or 0. if (numDocs == 1) { Document doc = r.document(r.maxDoc() - 1); if (doc.getField(SNAPSHOTS_ID) == null) { throw new IllegalStateException("directory is not a valid snapshots store!"); } doc.removeField(SNAPSHOTS_ID); for (IndexableField f : doc) { snapshots.put(f.name(), f.stringValue()); } } else if (numDocs != 0) { throw new IllegalStateException( "should be at most 1 document in the snapshots directory: " + numDocs); } } finally { r.close(); } return snapshots; }