示例#1
0
  public void updateCrawlTarget(final List<CrawlFile> crawls, final int documentBufferSize)
      throws SearchLibException {
    try {
      if (crawls == null) return;
      List<String> documentsToDelete = new ArrayList<String>(crawls.size());
      List<IndexDocument> documentsToUpdate = new ArrayList<IndexDocument>(documentBufferSize);
      String uniqueField = targetClient.getSchema().getUniqueField();
      for (CrawlFile crawl : crawls) {
        if (crawl == null) continue;
        FileItem currentFileItem = crawl.getFileItem();

        FileIndexDocumentIterator indexDocumentIterator = crawl.getTargetIndexDocumentIterator();

        TargetStatus targetStatus = currentFileItem.getIndexStatus().targetStatus;
        if (targetStatus == TargetStatus.TARGET_UPDATE) {
          if (!indexDocumentIterator.hasNext()) {
            currentFileItem.setIndexStatus(IndexStatus.NOTHING_TO_INDEX);
            continue;
          }
          while (indexDocumentIterator.hasNext()) {
            IndexDocument indexDocument = indexDocumentIterator.next();
            indexDocumentIterator.throwError();
            if (indexDocument == null) continue;
            if (uniqueField != null && !indexDocument.hasContent(uniqueField)) {
              currentFileItem.setIndexStatus(IndexStatus.INDEX_ERROR);
            } else {
              documentsToUpdate.add(indexDocument);
            }
            if (documentsToUpdate.size() >= documentBufferSize) {
              targetClient.updateDocuments(documentsToUpdate);
              documentsToUpdate.clear();
            }
          }
        } else if (targetStatus == TargetStatus.TARGET_DELETE)
          documentsToDelete.add(currentFileItem.getUri());
      } // crawl loop

      if (documentsToUpdate.size() > 0) targetClient.updateDocuments(documentsToUpdate);
      for (CrawlFile crawl : crawls) {
        FileItem currentFileItem = crawl.getFileItem();
        IndexStatus indexStatus = currentFileItem.getIndexStatus();
        if (indexStatus == IndexStatus.TO_INDEX || indexStatus == IndexStatus.NOT_INDEXED)
          currentFileItem.setIndexStatus(IndexStatus.INDEXED);
      }
      if (documentsToDelete.size() > 0) {
        String targetField =
            findIndexedFieldOfTargetIndex(
                targetClient.getFileCrawlerFieldMap(), FileItemFieldEnum.INSTANCE.uri.getName());
        if (targetField != null) targetClient.deleteDocuments(targetField, documentsToDelete);
      }
    } catch (IOException e) {
      throw new SearchLibException(e);
    }
  }
示例#2
0
 @Override
 public void update(Schema schema, IndexDocument document) throws SearchLibException {
   rwl.r.lock();
   try {
     if (!enabled) return;
     if (isJoin_noLock()) return;
     if (!StringUtils.isEmpty(defaultUser) && !StringUtils.isEmpty(userAllowField)) {
       if (!document.hasContent(userAllowField)) document.add(userAllowField, defaultUser, null);
     }
     if (!StringUtils.isEmpty(defaultGroup) && !StringUtils.isEmpty(groupAllowField)) {
       if (!document.hasContent(groupAllowField))
         document.add(groupAllowField, defaultGroup, null);
     }
   } finally {
     rwl.r.unlock();
   }
 }
示例#3
0
  @Override
  public void populate(IndexDocument indexDocument) {
    super.populate(indexDocument);

    if (repository != null)
      indexDocument.setString(FileItemFieldEnum.INSTANCE.repository.getName(), repository);

    indexDocument.setString(FileItemFieldEnum.INSTANCE.uri.getName(), getUri());

    if (directory != null)
      indexDocument.setString(FileItemFieldEnum.INSTANCE.directory.getName(), directory);

    if (host != null) indexDocument.setString(FileItemFieldEnum.INSTANCE.host.getName(), host);

    indexDocument.setStringList(
        FileItemFieldEnum.INSTANCE.subDirectory.getName(), getSubDirectory());

    if (crawlDate != null)
      indexDocument.setString(
          FileItemFieldEnum.INSTANCE.crawlDate.getName(), dateFormat.format(crawlDate));

    if (lang != null) indexDocument.setString(FileItemFieldEnum.INSTANCE.lang.getName(), lang);
    if (langMethod != null)
      indexDocument.setString(FileItemFieldEnum.INSTANCE.langMethod.getName(), langMethod);

    if (parser != null)
      indexDocument.setString(FileItemFieldEnum.INSTANCE.parser.getName(), parser);
    if (time != null)
      indexDocument.setString(
          FileItemFieldEnum.INSTANCE.time.getName(), contentLengthFormat.format(time));

    indexDocument.setStringList(FileItemFieldEnum.INSTANCE.userAllow.getName(), getUserAllow());
    indexDocument.setStringList(FileItemFieldEnum.INSTANCE.userDeny.getName(), getUserDeny());
    indexDocument.setStringList(FileItemFieldEnum.INSTANCE.groupAllow.getName(), getGroupAllow());
    indexDocument.setStringList(FileItemFieldEnum.INSTANCE.groupDeny.getName(), getGroupDeny());
  }