/** * Close the index * * @throws IOException * @throws CorruptIndexException */ public void close() throws CorruptIndexException, IOException { // Signal to the listener that we're done indexing and closing the index (which might take a // while) getListener().indexEnd(); getListener().closeStart(); searcher.getIndexStructure().addToTokenCount(getListener().getTokensProcessed()); searcher.getIndexStructure().writeMetadata(); searcher.close(); // Signal that we're completely done now getListener().closeEnd(); getListener().indexerClosed(); }
/** * How many more documents should we process? * * @return the number of documents */ public synchronized int docsToDoLeft() { try { if (maxNumberOfDocsToIndex < 0) return maxNumberOfDocsToIndex; int docsDone = searcher.getWriter().numDocs(); return Math.max(0, maxNumberOfDocsToIndex - docsDone); } catch (Exception e) { throw new RuntimeException(e); } }
/** * Index a document from a Reader, using the specified type of DocIndexer * * @param documentName some (preferably unique) name for this document (for example, the file name * or path) * @param reader where to index from * @throws Exception */ private void indexReader(String documentName, Reader reader) throws Exception { getListener().fileStarted(documentName); int docsDoneBefore = searcher.getWriter().numDocs(); long tokensDoneBefore = getListener().getTokensProcessed(); DocIndexer docIndexer = createDocIndexer(documentName, reader); docIndexer.index(); getListener().fileDone(documentName); int docsDoneAfter = searcher.getWriter().numDocs(); if (docsDoneAfter == docsDoneBefore) { System.err.println("*** Warning, couldn't index " + documentName + "; wrong format?"); } long tokensDoneAfter = getListener().getTokensProcessed(); if (tokensDoneAfter == tokensDoneBefore) { System.err.println("*** Warning, no words indexed in " + documentName + "; wrong format?"); } }
@Override public Response handle() throws BlsException { Collection<String> indices = searchMan.getAllAvailableIndices(user.getUserId()); DataObjectMapAttribute doIndices = new DataObjectMapAttribute("index", "name"); // DataObjectList doIndices = new DataObjectList("index"); for (String indexName : indices) { DataObjectMapElement doIndex = new DataObjectMapElement(); Searcher searcher = searchMan.getSearcher(indexName); IndexStructure struct = searcher.getIndexStructure(); doIndex.put("displayName", struct.getDisplayName()); doIndex.put("status", searchMan.getIndexStatus(indexName)); String documentFormat = struct.getDocumentFormat(); if (documentFormat != null && documentFormat.length() > 0) doIndex.put("documentFormat", documentFormat); doIndex.put("timeModified", struct.getTimeModified()); if (struct.getTokenCount() > 0) doIndex.put("tokenCount", struct.getTokenCount()); doIndices.put(indexName, doIndex); } DataObjectMapElement doUser = new DataObjectMapElement(); doUser.put("loggedIn", user.isLoggedIn()); if (user.isLoggedIn()) doUser.put("id", user.getUserId()); doUser.put( "canCreateIndex", user.isLoggedIn() ? searchMan.canCreateIndex(user.getUserId()) : false); DataObjectMapElement response = new DataObjectMapElement(); response.put("blacklabBuildTime", Searcher.getBlackLabBuildTime()); response.put("indices", doIndices); response.put("user", doUser); response.put("helpPageUrl", servlet.getServletContext().getContextPath() + "/help"); if (debugMode) { response.put("cacheStatus", searchMan.getCacheStatusDataObject()); } Response responseObj = new Response(response); responseObj.setCacheAllowed(false); // You can create/delete indices, don't cache the list return responseObj; }
/** * Construct Indexer * * @param directory the main BlackLab index directory * @param create if true, creates a new index; otherwise, appends to existing index * @param docIndexerClass how to index the files, or null to autodetect * @param indexTemplateFile JSON file to use as template for index structure / metadata (if * creating new index) * @throws DocumentFormatException if no DocIndexer was specified and autodetection failed * @throws IOException */ public Indexer( File directory, boolean create, Class<? extends DocIndexer> docIndexerClass, File indexTemplateFile) throws DocumentFormatException, IOException { this.docIndexerClass = docIndexerClass; searcher = Searcher.openForWriting(directory, create, indexTemplateFile); if (!create) searcher.getIndexStructure().setModified(); if (this.docIndexerClass == null) { // No DocIndexer supplied; try to detect it from the index // metadata. String formatId = searcher.getIndexStructure().getDocumentFormat(); if (formatId != null && formatId.length() > 0) setDocIndexer(DocumentFormats.getIndexerClass(formatId)); else { throw new DocumentFormatException("Cannot detect document format for index!"); } } metadataFieldTypeTokenized = new FieldType(); metadataFieldTypeTokenized.setStored(true); metadataFieldTypeTokenized.setIndexed(true); metadataFieldTypeTokenized.setTokenized(true); metadataFieldTypeTokenized.setOmitNorms(true); // @@@ <-- depending on setting? metadataFieldTypeTokenized.setStoreTermVectors(true); metadataFieldTypeTokenized.setStoreTermVectorPositions(true); metadataFieldTypeTokenized.setStoreTermVectorOffsets(true); metadataFieldTypeTokenized.freeze(); metadataFieldTypeUntokenized = new FieldType(metadataFieldTypeTokenized); metadataFieldTypeUntokenized.setTokenized(false); metadataFieldTypeUntokenized.freeze(); }
/** * Get the IndexWriter we're using. * * <p>Useful if e.g. you want to access FSDirectory. * * @return the IndexWriter */ protected IndexWriter getWriter() { return searcher.getWriter(); }
/** * Get our index directory * * @return the index directory */ public File getIndexLocation() { return searcher.getIndexDirectory(); }
public ContentStore getContentStore(String fieldName) { return searcher.getContentStore(fieldName); }
/** * Add a list of tokens to a forward index * * @param fieldName what forward index to add this to * @param tokens the tokens to add * @param posIncr position increment associated with each token * @return the id assigned to the content */ public int addToForwardIndex(String fieldName, List<String> tokens, List<Integer> posIncr) { ForwardIndex forwardIndex = searcher.getForwardIndex(fieldName); if (forwardIndex == null) throw new RuntimeException("No forward index for field " + fieldName); return forwardIndex.addDocument(tokens, posIncr); }
/** * Updates the specified Document in the index. * * @param term how to find the document to update * @param document the updated document * @throws CorruptIndexException * @throws IOException */ public void update(Term term, Document document) throws CorruptIndexException, IOException { searcher.getWriter().updateDocument(term, document); getListener().luceneDocumentAdded(); }
/** * Add a Lucene document to the index * * @param document the document to add * @throws CorruptIndexException * @throws IOException */ public void add(Document document) throws CorruptIndexException, IOException { searcher.getWriter().addDocument(document, searcher.getAnalyzer()); getListener().luceneDocumentAdded(); }
/** * Call this to roll back any changes made to the index this session. Calling close() will * automatically commit any changes. If you call this method, then call close(), no changes will * be committed. */ public void rollback() { getListener().rollbackStart(); searcher.rollback(); getListener().rollbackEnd(); }
/** * Set the collator to use for sorting (passed to ForwardIndex to keep a sorted list of terms). * Defaults to English collator. * * @param collator the collator * @deprecated use Searcher.setDefaultCollator() */ @Deprecated public static void setCollator(Collator collator) { Searcher.setDefaultCollator(collator); }