Exemple #1
0
  /**
   * Close the index
   *
   * @throws IOException
   * @throws CorruptIndexException
   */
  public void close() throws CorruptIndexException, IOException {

    // Signal to the listener that we're done indexing and closing the index (which might take a
    // while)
    getListener().indexEnd();
    getListener().closeStart();

    searcher.getIndexStructure().addToTokenCount(getListener().getTokensProcessed());
    searcher.getIndexStructure().writeMetadata();

    searcher.close();

    // Signal that we're completely done now
    getListener().closeEnd();
    getListener().indexerClosed();
  }
Exemple #2
0
 /**
  * How many more documents should we process?
  *
  * @return the number of documents
  */
 public synchronized int docsToDoLeft() {
   try {
     if (maxNumberOfDocsToIndex < 0) return maxNumberOfDocsToIndex;
     int docsDone = searcher.getWriter().numDocs();
     return Math.max(0, maxNumberOfDocsToIndex - docsDone);
   } catch (Exception e) {
     throw new RuntimeException(e);
   }
 }
Exemple #3
0
  /**
   * Index a document from a Reader, using the specified type of DocIndexer
   *
   * @param documentName some (preferably unique) name for this document (for example, the file name
   *     or path)
   * @param reader where to index from
   * @throws Exception
   */
  private void indexReader(String documentName, Reader reader) throws Exception {
    getListener().fileStarted(documentName);
    int docsDoneBefore = searcher.getWriter().numDocs();
    long tokensDoneBefore = getListener().getTokensProcessed();

    DocIndexer docIndexer = createDocIndexer(documentName, reader);

    docIndexer.index();
    getListener().fileDone(documentName);
    int docsDoneAfter = searcher.getWriter().numDocs();
    if (docsDoneAfter == docsDoneBefore) {
      System.err.println("*** Warning, couldn't index " + documentName + "; wrong format?");
    }
    long tokensDoneAfter = getListener().getTokensProcessed();
    if (tokensDoneAfter == tokensDoneBefore) {
      System.err.println("*** Warning, no words indexed in " + documentName + "; wrong format?");
    }
  }
  @Override
  public Response handle() throws BlsException {
    Collection<String> indices = searchMan.getAllAvailableIndices(user.getUserId());
    DataObjectMapAttribute doIndices = new DataObjectMapAttribute("index", "name");
    // DataObjectList doIndices = new DataObjectList("index");
    for (String indexName : indices) {
      DataObjectMapElement doIndex = new DataObjectMapElement();
      Searcher searcher = searchMan.getSearcher(indexName);
      IndexStructure struct = searcher.getIndexStructure();
      doIndex.put("displayName", struct.getDisplayName());
      doIndex.put("status", searchMan.getIndexStatus(indexName));
      String documentFormat = struct.getDocumentFormat();
      if (documentFormat != null && documentFormat.length() > 0)
        doIndex.put("documentFormat", documentFormat);
      doIndex.put("timeModified", struct.getTimeModified());
      if (struct.getTokenCount() > 0) doIndex.put("tokenCount", struct.getTokenCount());
      doIndices.put(indexName, doIndex);
    }

    DataObjectMapElement doUser = new DataObjectMapElement();
    doUser.put("loggedIn", user.isLoggedIn());
    if (user.isLoggedIn()) doUser.put("id", user.getUserId());
    doUser.put(
        "canCreateIndex", user.isLoggedIn() ? searchMan.canCreateIndex(user.getUserId()) : false);

    DataObjectMapElement response = new DataObjectMapElement();
    response.put("blacklabBuildTime", Searcher.getBlackLabBuildTime());
    response.put("indices", doIndices);
    response.put("user", doUser);
    response.put("helpPageUrl", servlet.getServletContext().getContextPath() + "/help");
    if (debugMode) {
      response.put("cacheStatus", searchMan.getCacheStatusDataObject());
    }

    Response responseObj = new Response(response);
    responseObj.setCacheAllowed(false); // You can create/delete indices, don't cache the list
    return responseObj;
  }
Exemple #5
0
  /**
   * Construct Indexer
   *
   * @param directory the main BlackLab index directory
   * @param create if true, creates a new index; otherwise, appends to existing index
   * @param docIndexerClass how to index the files, or null to autodetect
   * @param indexTemplateFile JSON file to use as template for index structure / metadata (if
   *     creating new index)
   * @throws DocumentFormatException if no DocIndexer was specified and autodetection failed
   * @throws IOException
   */
  public Indexer(
      File directory,
      boolean create,
      Class<? extends DocIndexer> docIndexerClass,
      File indexTemplateFile)
      throws DocumentFormatException, IOException {
    this.docIndexerClass = docIndexerClass;

    searcher = Searcher.openForWriting(directory, create, indexTemplateFile);
    if (!create) searcher.getIndexStructure().setModified();

    if (this.docIndexerClass == null) {
      // No DocIndexer supplied; try to detect it from the index
      // metadata.
      String formatId = searcher.getIndexStructure().getDocumentFormat();
      if (formatId != null && formatId.length() > 0)
        setDocIndexer(DocumentFormats.getIndexerClass(formatId));
      else {
        throw new DocumentFormatException("Cannot detect document format for index!");
      }
    }

    metadataFieldTypeTokenized = new FieldType();
    metadataFieldTypeTokenized.setStored(true);
    metadataFieldTypeTokenized.setIndexed(true);
    metadataFieldTypeTokenized.setTokenized(true);
    metadataFieldTypeTokenized.setOmitNorms(true); // @@@ <-- depending on setting?
    metadataFieldTypeTokenized.setStoreTermVectors(true);
    metadataFieldTypeTokenized.setStoreTermVectorPositions(true);
    metadataFieldTypeTokenized.setStoreTermVectorOffsets(true);
    metadataFieldTypeTokenized.freeze();

    metadataFieldTypeUntokenized = new FieldType(metadataFieldTypeTokenized);
    metadataFieldTypeUntokenized.setTokenized(false);
    metadataFieldTypeUntokenized.freeze();
  }
Exemple #6
0
 /**
  * Get the IndexWriter we're using.
  *
  * <p>Useful if e.g. you want to access FSDirectory.
  *
  * @return the IndexWriter
  */
 protected IndexWriter getWriter() {
   return searcher.getWriter();
 }
Exemple #7
0
 /**
  * Get our index directory
  *
  * @return the index directory
  */
 public File getIndexLocation() {
   return searcher.getIndexDirectory();
 }
Exemple #8
0
 public ContentStore getContentStore(String fieldName) {
   return searcher.getContentStore(fieldName);
 }
Exemple #9
0
  /**
   * Add a list of tokens to a forward index
   *
   * @param fieldName what forward index to add this to
   * @param tokens the tokens to add
   * @param posIncr position increment associated with each token
   * @return the id assigned to the content
   */
  public int addToForwardIndex(String fieldName, List<String> tokens, List<Integer> posIncr) {
    ForwardIndex forwardIndex = searcher.getForwardIndex(fieldName);
    if (forwardIndex == null) throw new RuntimeException("No forward index for field " + fieldName);

    return forwardIndex.addDocument(tokens, posIncr);
  }
Exemple #10
0
 /**
  * Updates the specified Document in the index.
  *
  * @param term how to find the document to update
  * @param document the updated document
  * @throws CorruptIndexException
  * @throws IOException
  */
 public void update(Term term, Document document) throws CorruptIndexException, IOException {
   searcher.getWriter().updateDocument(term, document);
   getListener().luceneDocumentAdded();
 }
Exemple #11
0
 /**
  * Add a Lucene document to the index
  *
  * @param document the document to add
  * @throws CorruptIndexException
  * @throws IOException
  */
 public void add(Document document) throws CorruptIndexException, IOException {
   searcher.getWriter().addDocument(document, searcher.getAnalyzer());
   getListener().luceneDocumentAdded();
 }
Exemple #12
0
 /**
  * Call this to roll back any changes made to the index this session. Calling close() will
  * automatically commit any changes. If you call this method, then call close(), no changes will
  * be committed.
  */
 public void rollback() {
   getListener().rollbackStart();
   searcher.rollback();
   getListener().rollbackEnd();
 }
Exemple #13
0
 /**
  * Set the collator to use for sorting (passed to ForwardIndex to keep a sorted list of terms).
  * Defaults to English collator.
  *
  * @param collator the collator
  * @deprecated use Searcher.setDefaultCollator()
  */
 @Deprecated
 public static void setCollator(Collator collator) {
   Searcher.setDefaultCollator(collator);
 }