public void updateCrawlTarget(final List<CrawlFile> crawls, final int documentBufferSize) throws SearchLibException { try { if (crawls == null) return; List<String> documentsToDelete = new ArrayList<String>(crawls.size()); List<IndexDocument> documentsToUpdate = new ArrayList<IndexDocument>(documentBufferSize); String uniqueField = targetClient.getSchema().getUniqueField(); for (CrawlFile crawl : crawls) { if (crawl == null) continue; FileItem currentFileItem = crawl.getFileItem(); FileIndexDocumentIterator indexDocumentIterator = crawl.getTargetIndexDocumentIterator(); TargetStatus targetStatus = currentFileItem.getIndexStatus().targetStatus; if (targetStatus == TargetStatus.TARGET_UPDATE) { if (!indexDocumentIterator.hasNext()) { currentFileItem.setIndexStatus(IndexStatus.NOTHING_TO_INDEX); continue; } while (indexDocumentIterator.hasNext()) { IndexDocument indexDocument = indexDocumentIterator.next(); indexDocumentIterator.throwError(); if (indexDocument == null) continue; if (uniqueField != null && !indexDocument.hasContent(uniqueField)) { currentFileItem.setIndexStatus(IndexStatus.INDEX_ERROR); } else { documentsToUpdate.add(indexDocument); } if (documentsToUpdate.size() >= documentBufferSize) { targetClient.updateDocuments(documentsToUpdate); documentsToUpdate.clear(); } } } else if (targetStatus == TargetStatus.TARGET_DELETE) documentsToDelete.add(currentFileItem.getUri()); } // crawl loop if (documentsToUpdate.size() > 0) targetClient.updateDocuments(documentsToUpdate); for (CrawlFile crawl : crawls) { FileItem currentFileItem = crawl.getFileItem(); IndexStatus indexStatus = currentFileItem.getIndexStatus(); if (indexStatus == IndexStatus.TO_INDEX || indexStatus == IndexStatus.NOT_INDEXED) currentFileItem.setIndexStatus(IndexStatus.INDEXED); } if (documentsToDelete.size() > 0) { String targetField = findIndexedFieldOfTargetIndex( targetClient.getFileCrawlerFieldMap(), FileItemFieldEnum.INSTANCE.uri.getName()); if (targetField != null) targetClient.deleteDocuments(targetField, documentsToDelete); } } catch (IOException e) { throw new SearchLibException(e); } }
@Override public void update(Schema schema, IndexDocument document) throws SearchLibException { rwl.r.lock(); try { if (!enabled) return; if (isJoin_noLock()) return; if (!StringUtils.isEmpty(defaultUser) && !StringUtils.isEmpty(userAllowField)) { if (!document.hasContent(userAllowField)) document.add(userAllowField, defaultUser, null); } if (!StringUtils.isEmpty(defaultGroup) && !StringUtils.isEmpty(groupAllowField)) { if (!document.hasContent(groupAllowField)) document.add(groupAllowField, defaultGroup, null); } } finally { rwl.r.unlock(); } }
@Override public void populate(IndexDocument indexDocument) { super.populate(indexDocument); if (repository != null) indexDocument.setString(FileItemFieldEnum.INSTANCE.repository.getName(), repository); indexDocument.setString(FileItemFieldEnum.INSTANCE.uri.getName(), getUri()); if (directory != null) indexDocument.setString(FileItemFieldEnum.INSTANCE.directory.getName(), directory); if (host != null) indexDocument.setString(FileItemFieldEnum.INSTANCE.host.getName(), host); indexDocument.setStringList( FileItemFieldEnum.INSTANCE.subDirectory.getName(), getSubDirectory()); if (crawlDate != null) indexDocument.setString( FileItemFieldEnum.INSTANCE.crawlDate.getName(), dateFormat.format(crawlDate)); if (lang != null) indexDocument.setString(FileItemFieldEnum.INSTANCE.lang.getName(), lang); if (langMethod != null) indexDocument.setString(FileItemFieldEnum.INSTANCE.langMethod.getName(), langMethod); if (parser != null) indexDocument.setString(FileItemFieldEnum.INSTANCE.parser.getName(), parser); if (time != null) indexDocument.setString( FileItemFieldEnum.INSTANCE.time.getName(), contentLengthFormat.format(time)); indexDocument.setStringList(FileItemFieldEnum.INSTANCE.userAllow.getName(), getUserAllow()); indexDocument.setStringList(FileItemFieldEnum.INSTANCE.userDeny.getName(), getUserDeny()); indexDocument.setStringList(FileItemFieldEnum.INSTANCE.groupAllow.getName(), getGroupAllow()); indexDocument.setStringList(FileItemFieldEnum.INSTANCE.groupDeny.getName(), getGroupDeny()); }