public LexiconPostingChecker(String id, File dir, int revision, int findDocNo) throws IOException { this.id = id; this.dir = dir; this.rev = revision; this.findDocNo = findDocNo; System.out.println("Check dir = " + dir.getAbsolutePath()); indexInput = new BufferedFileInput( IndexFileNames.getRevisionDir(dir, revision), IndexFileNames.getSearchLexiconFileName(id)); }
public void test2() throws IOException { File segmentDir = new File("/Users/swsong/search/fastcat_basic/collection/news/data/1"); int revision = 45; System.out.println(segmentDir); BitSet deleteSet = new BitSet( IndexFileNames.getRevisionDir(segmentDir, revision), IndexFileNames.docDeleteSet); for (int i = 0; i < 100; i++) { System.out.println(i); if (deleteSet.isSet(i)) { System.out.println(">> " + i); } } }
public void list(PrintStream output) throws IOException { int indexSize = indexInput.readInt(); output.println("Memory indexsize = " + indexSize); StringBuilder findWord = new StringBuilder(); for (int k = 0; k < indexSize; k++) { String string = new String(indexInput.readUString()); long inputOffset = indexInput.readLong(); // output.println("word="+string+" ,"+ inputOffset); BufferedFileInput postingInput = new BufferedFileInput( IndexFileNames.getRevisionDir(dir, rev), IndexFileNames.getSearchPostingFileName(id)); BufferedFileInput clone = postingInput.clone(); IndexFieldOption indexFieldOption = new IndexFieldOption(clone.readInt()); // output.println("offset:" + inputOffset); clone.seek(inputOffset); int len = 0, postingCount = 0, lastDocNo = 0; // List<Integer> postingList = new ArrayList<Integer>(); try { len = clone.readVInt(); postingCount = clone.readInt(); lastDocNo = clone.readInt(); int postingRemain = postingCount; int prevId = -1; boolean isStorePosition = indexFieldOption.isStorePosition(); for (int i = 0; postingRemain > 0; i++) { int docId = -1; int readed = clone.readVInt(); if (prevId >= 0) { docId = readed + prevId + 1; } else { docId = readed; } if (findDocNo == docId) { findWord.append("\"").append(string).append("\", "); } int tf = clone.readVInt(); if (tf > 0 && isStorePosition) { int prevPosition = -1; for (int j = 0; j < tf; j++) { int position = 0; if (prevPosition >= 0) { position = clone.readVInt() + prevPosition + 1; } else { position = clone.readVInt(); } prevPosition = position; } } // postingList.add(docId); postingRemain--; prevId = docId; } } catch (IOException ex) { ex.printStackTrace(); } postingInput.close(); // output.println("poosting len : " + len + " postingCount : " // + postingCount + " / lastDocNo : " + lastDocNo); // output.println("postingList:"+postingList); if (findDocNo != -1) { // output.println("findDocNo:"+findDocNo+" // "+(postingList.contains(findDocNo)?"CONTAINS":"NOT")); } } output.println("docNo [" + findDocNo + "] has word " + findWord); }
@Override protected boolean done(RevisionInfo revisionInfo, IndexStatus indexStatus) throws IRException, IndexingStopException { int insertCount = revisionInfo.getInsertCount(); int deleteCount = revisionInfo.getDeleteCount(); FilePaths indexFilePaths = collectionContext.indexFilePaths(); try { if ((insertCount > 0 || deleteCount > 0) && !stopRequested) { if (insertCount > 0) { revisionInfo.setRefWithRevision(); } else { // 추가문서가 없고 삭제문서만 존재할 경우 logger.debug("추가문서없이 삭제문서만 존재합니다.!!"); if (workingSegmentInfo != null && !workingSegmentInfo.equals(workingSegmentInfo)) { // 기존색인문서수가 limit을 넘으면서 삭제문서만 색인될 경우 세그먼트가 바뀌는 현상이 나타날수 있다. // 색인후 문서가 0건이고 delete문서가 존재하면 이전 세그먼트의 다음 리비전으로 변경해주는 작업필요. // 세그먼트가 다르면, 즉 증가했으면 다시 원래의 세그먼트로 돌리고, rev를 증가시킨다. File segmentDir = indexFilePaths.file(workingSegmentInfo.getId()); // FileUtils.deleteDirectory(segmentDir); FileUtils.forceDelete(segmentDir); logger.debug("# 추가문서가 없으므로, segment를 삭제합니다. {}", segmentDir.getAbsolutePath()); workingSegmentInfo = workingSegmentInfo.copy(); int revision = workingSegmentInfo.getRevision(); workingSegmentInfo.getRevisionInfo().setInsertCount(0); workingSegmentInfo.getRevisionInfo().setUpdateCount(0); workingSegmentInfo.getRevisionInfo().setDeleteCount(deleteCount); // 이전 리비전의 delete.set.#을 현 리비전으로 복사해온다. // 원래 primarykeyindexeswriter에서 append일 경우 복사를 하나, 여기서는 추가문서가 0이므로 String segmentId = workingSegmentInfo.getId(); segmentDir = indexFilePaths.file(workingSegmentInfo.getId()); File revisionDir = IndexFileNames.getRevisionDir(segmentDir, revision); File prevRevisionDir = IndexFileNames.getRevisionDir(segmentDir, revision - 1); String deleteFileName = IndexFileNames.getSuffixFileName(IndexFileNames.docDeleteSet, segmentId); FileUtils.copyFile( new File(prevRevisionDir, deleteFileName), new File(revisionDir, deleteFileName)); } /* * else 세그먼트가 증가하지 않고 리비전이 증가한 경우. */ } File segmentDir = indexFilePaths.file(workingSegmentInfo.getId()); collectionHandler.updateCollection( collectionContext, workingSegmentInfo, segmentDir, deleteIdSet); // status.xml 업데이트 collectionContext.updateCollectionStatus( IndexingType.ADD, revisionInfo, startTime, System.currentTimeMillis()); collectionContext.indexStatus().setAddIndexStatus(indexStatus); return true; } else { // 추가,삭제 문서 모두 없을때. // 리비전 디렉토리 삭제. File segmentDir = indexFilePaths.file(workingSegmentInfo.getId()); File revisionDir = IndexFileNames.getRevisionDir(segmentDir, revisionInfo.getId()); if (workingSegmentInfo != null && !workingSegmentInfo.equals(workingSegmentInfo)) { // 세그먼트 증가시 segment디렉토리 삭제. FileUtils.deleteDirectory(segmentDir); logger.info("delete segment dir ={}", segmentDir.getAbsolutePath()); } else { // 리비전 증가시 revision디렉토리 삭제. FileUtils.deleteDirectory(revisionDir); logger.info("delete revision dir ={}", revisionDir.getAbsolutePath()); } if (!stopRequested) { logger.info( "[{}] Indexing Canceled due to no documents.", collectionContext.collectionId()); throw new IndexingStopException( collectionContext.collectionId() + " Indexing Canceled due to no documents."); } else { logger.info( "[{}] Indexing Canceled due to Stop Requested!", collectionContext.collectionId()); throw new IndexingStopException( collectionContext.collectionId() + " Indexing Canceled due to Stop Requested"); } } } catch (IOException e) { throw new IRException(e); } }