public void store(final DictionaryFile<? extends DictionaryItem> dictFile, final File file) { final FessConfig fessConfig = ComponentUtil.getFessConfig(); getDictionaryFile(dictFile.getId()) .ifPresent( currentFile -> { if (currentFile.getTimestamp().getTime() > dictFile.getTimestamp().getTime()) { throw new DictionaryException(dictFile.getPath() + " was updated."); } // TODO use stream try (CurlResponse response = Curl.post(fessConfig.getElasticsearchUrl() + "/_configsync/file") .param("path", dictFile.getPath()) .body(FileUtil.readUTF8(file)) .execute()) { final Map<String, Object> contentMap = response.getContentAsMap(); if (!Constants.TRUE.equalsIgnoreCase(contentMap.get("acknowledged").toString())) { throw new DictionaryException("Failed to update " + dictFile.getPath()); } } catch (final IOException e) { throw new DictionaryException("Failed to update " + dictFile.getPath(), e); } }) .orElse( () -> { throw new DictionaryException(dictFile.getPath() + " does not exist."); }); }
@Override public int getStartPosition() { if (start == null) { start = ComponentUtil.getFessConfig().getPagingSearchPageStartAsInteger(); } return start; }
private void deleteOldDocs() { if (Constants.FALSE.equals(initParamMap.get(DELETE_OLD_DOCS))) { return; } final String sessionId = initParamMap.get(Constants.SESSION_ID); if (StringUtil.isBlank(sessionId)) { logger.warn("Invalid sessionId at " + dataConfig); return; } final FessConfig fessConfig = ComponentUtil.getFessConfig(); final QueryBuilder queryBuilder = QueryBuilders.boolQuery() .must( QueryBuilders.termQuery( fessConfig.getIndexFieldConfigId(), dataConfig.getConfigId())) .must( QueryBuilders.boolQuery() .should( QueryBuilders.rangeQuery(fessConfig.getIndexFieldExpires()).lte("now")) .should(QueryBuilders.missingQuery(fessConfig.getIndexFieldExpires()))) .mustNot(QueryBuilders.termQuery(fessConfig.getIndexFieldSegment(), sessionId)); try { final FessEsClient fessEsClient = ComponentUtil.getFessEsClient(); final String index = fessConfig.getIndexDocumentUpdateIndex(); fessEsClient.admin().indices().prepareRefresh(index).execute().actionGet(); final int numOfDeleted = fessEsClient.deleteByQuery(index, fessConfig.getIndexDocumentType(), queryBuilder); logger.info("Deleted {} old docs.", numOfDeleted); } catch (final Exception e) { logger.error("Could not delete old docs at " + dataConfig, e); } }
@PostConstruct public void init() { StreamUtil.of(ComponentUtil.getFessConfig().getSearchDefaultRoles().split(",")) .filter(name -> StringUtil.isNotBlank(name)) .forEach( name -> { defaultRoleList.add(name); }); }
@Override public int getPageSize() { final FessConfig fessConfig = ComponentUtil.getFessConfig(); if (num == null) { num = fessConfig.getPagingSearchPageSizeAsInteger(); } if (num > fessConfig.getPagingSearchPageMaxSizeAsInteger().intValue() || num <= 0) { num = fessConfig.getPagingSearchPageMaxSizeAsInteger(); } return num; }
public void initialize() { final FessConfig fessConfig = ComponentUtil.getFessConfig(); if (start == null) { start = fessConfig.getPagingSearchPageStartAsInteger(); } if (num == null) { num = fessConfig.getPagingSearchPageSizeAsInteger(); } else if (num > fessConfig.getPagingSearchPageMaxSizeAsInteger().intValue()) { num = fessConfig.getPagingSearchPageMaxSizeAsInteger(); } }
public InputStream getContentInputStream( final DictionaryFile<? extends DictionaryItem> dictFile) { final FessConfig fessConfig = ComponentUtil.getFessConfig(); try { return Curl.get(fessConfig.getElasticsearchUrl() + "/_configsync/file") .param("path", dictFile.getPath()) .execute() .getContentAsStream(); } catch (final IOException e) { throw new DictionaryException("Failed to access " + dictFile.getPath(), e); } }
public DictionaryFile<? extends DictionaryItem>[] getDictionaryFiles() { final FessConfig fessConfig = ComponentUtil.getFessConfig(); try (CurlResponse response = Curl.get(fessConfig.getElasticsearchUrl() + "/_configsync/file") .param("fields", "path,@timestamp") .execute()) { final Map<String, Object> contentMap = response.getContentAsMap(); @SuppressWarnings("unchecked") final List<Map<String, Object>> fileList = (List<Map<String, Object>>) contentMap.get("file"); return fileList .stream() .map( fileMap -> { try { final String path = fileMap.get("path").toString(); final Date timestamp = new SimpleDateFormat(Constants.DATE_FORMAT_ISO_8601_EXTEND_UTC) .parse(fileMap.get("@timestamp").toString()); for (final DictionaryCreator creator : creatorList) { final DictionaryFile<? extends DictionaryItem> file = creator.create(path, timestamp); if (file != null) { return file; } } } catch (final Exception e) { logger.warn("Failed to load " + fileMap, e); } return null; }) .filter(file -> file != null) .toArray(n -> new DictionaryFile<?>[n]); } catch (final IOException e) { throw new DictionaryException("Failed to access dictionaries", e); } }
protected void doCrawl(final String sessionId, final List<DataConfig> configList) { final int multiprocessCrawlingCount = ComponentUtil.getFessConfig().getCrawlingThreadCount(); final long startTime = System.currentTimeMillis(); final IndexUpdateCallback indexUpdateCallback = ComponentUtil.getComponent(IndexUpdateCallback.class); final List<String> sessionIdList = new ArrayList<>(); final Map<String, String> initParamMap = new HashMap<>(); dataCrawlingThreadList.clear(); final List<String> dataCrawlingThreadStatusList = new ArrayList<>(); for (final DataConfig dataConfig : configList) { final String sid = crawlingConfigHelper.store(sessionId, dataConfig); sessionIdList.add(sid); initParamMap.put(Constants.SESSION_ID, sessionId); initParamMap.put(Constants.CRAWLING_INFO_ID, sid); final DataCrawlingThread dataCrawlingThread = new DataCrawlingThread(dataConfig, indexUpdateCallback, initParamMap); dataCrawlingThread.setPriority(crawlerPriority); dataCrawlingThread.setName(sid); dataCrawlingThread.setDaemon(true); dataCrawlingThreadList.add(dataCrawlingThread); dataCrawlingThreadStatusList.add(Constants.READY); } final SystemHelper systemHelper = ComponentUtil.getSystemHelper(); int startedCrawlerNum = 0; int activeCrawlerNum = 0; while (startedCrawlerNum < dataCrawlingThreadList.size()) { // Force to stop crawl if (systemHelper.isForceStop()) { for (final DataCrawlingThread crawlerThread : dataCrawlingThreadList) { crawlerThread.stopCrawling(); } break; } if (activeCrawlerNum < multiprocessCrawlingCount) { // start crawling dataCrawlingThreadList.get(startedCrawlerNum).start(); dataCrawlingThreadStatusList.set(startedCrawlerNum, Constants.RUNNING); startedCrawlerNum++; activeCrawlerNum++; try { Thread.sleep(crawlingExecutionInterval); } catch (final InterruptedException e) { if (logger.isDebugEnabled()) { logger.debug("Interrupted.", e); } } continue; } // check status for (int i = 0; i < startedCrawlerNum; i++) { if (!dataCrawlingThreadList.get(i).isRunning() && dataCrawlingThreadStatusList.get(i).equals(Constants.RUNNING)) { dataCrawlingThreadList.get(i).awaitTermination(); dataCrawlingThreadStatusList.set(i, Constants.DONE); activeCrawlerNum--; } } try { Thread.sleep(crawlingExecutionInterval); } catch (final InterruptedException e) { if (logger.isDebugEnabled()) { logger.debug("Interrupted.", e); } } } boolean finishedAll = false; while (!finishedAll) { finishedAll = true; for (int i = 0; i < dataCrawlingThreadList.size(); i++) { dataCrawlingThreadList.get(i).awaitTermination(crawlingExecutionInterval); if (!dataCrawlingThreadList.get(i).isRunning() && dataCrawlingThreadStatusList.get(i).equals(Constants.RUNNING)) { dataCrawlingThreadStatusList.set(i, Constants.DONE); } if (!dataCrawlingThreadStatusList.get(i).equals(Constants.DONE)) { finishedAll = false; } } } dataCrawlingThreadList.clear(); dataCrawlingThreadStatusList.clear(); // put cralwing info final CrawlingInfoHelper crawlingInfoHelper = ComponentUtil.getCrawlingInfoHelper(); final long execTime = System.currentTimeMillis() - startTime; crawlingInfoHelper.putToInfoMap(Constants.DATA_CRAWLING_EXEC_TIME, Long.toString(execTime)); if (logger.isInfoEnabled()) { logger.info("[EXEC TIME] crawling time: " + execTime + "ms"); } crawlingInfoHelper.putToInfoMap( Constants.DATA_INDEX_EXEC_TIME, Long.toString(indexUpdateCallback.getExecuteTime())); crawlingInfoHelper.putToInfoMap( Constants.DATA_INDEX_SIZE, Long.toString(indexUpdateCallback.getDocumentSize())); for (final String sid : sessionIdList) { // remove config crawlingConfigHelper.remove(sid); } }