public LexiconPostingChecker(String id, File dir, int revision, int findDocNo)
      throws IOException {
    this.id = id;
    this.dir = dir;
    this.rev = revision;
    this.findDocNo = findDocNo;

    System.out.println("Check dir = " + dir.getAbsolutePath());

    indexInput =
        new BufferedFileInput(
            IndexFileNames.getRevisionDir(dir, revision),
            IndexFileNames.getSearchLexiconFileName(id));
  }
Exemplo n.º 2
0
 public void test2() throws IOException {
   File segmentDir = new File("/Users/swsong/search/fastcat_basic/collection/news/data/1");
   int revision = 45;
   System.out.println(segmentDir);
   BitSet deleteSet =
       new BitSet(
           IndexFileNames.getRevisionDir(segmentDir, revision), IndexFileNames.docDeleteSet);
   for (int i = 0; i < 100; i++) {
     System.out.println(i);
     if (deleteSet.isSet(i)) {
       System.out.println(">> " + i);
     }
   }
 }
  public void list(PrintStream output) throws IOException {
    int indexSize = indexInput.readInt();
    output.println("Memory indexsize = " + indexSize);

    StringBuilder findWord = new StringBuilder();

    for (int k = 0; k < indexSize; k++) {
      String string = new String(indexInput.readUString());
      long inputOffset = indexInput.readLong();

      // output.println("word="+string+" ,"+ inputOffset);
      BufferedFileInput postingInput =
          new BufferedFileInput(
              IndexFileNames.getRevisionDir(dir, rev), IndexFileNames.getSearchPostingFileName(id));

      BufferedFileInput clone = postingInput.clone();

      IndexFieldOption indexFieldOption = new IndexFieldOption(clone.readInt());

      // output.println("offset:" + inputOffset);

      clone.seek(inputOffset);

      int len = 0, postingCount = 0, lastDocNo = 0;

      // List<Integer> postingList = new ArrayList<Integer>();

      try {
        len = clone.readVInt();
        postingCount = clone.readInt();
        lastDocNo = clone.readInt();

        int postingRemain = postingCount;

        int prevId = -1;

        boolean isStorePosition = indexFieldOption.isStorePosition();

        for (int i = 0; postingRemain > 0; i++) {
          int docId = -1;
          int readed = clone.readVInt();
          if (prevId >= 0) {
            docId = readed + prevId + 1;
          } else {
            docId = readed;
          }

          if (findDocNo == docId) {
            findWord.append("\"").append(string).append("\", ");
          }

          int tf = clone.readVInt();
          if (tf > 0 && isStorePosition) {

            int prevPosition = -1;
            for (int j = 0; j < tf; j++) {
              int position = 0;
              if (prevPosition >= 0) {
                position = clone.readVInt() + prevPosition + 1;
              } else {
                position = clone.readVInt();
              }
              prevPosition = position;
            }
          }
          // postingList.add(docId);

          postingRemain--;
          prevId = docId;
        }

      } catch (IOException ex) {
        ex.printStackTrace();
      }

      postingInput.close();

      // output.println("poosting len : " + len + " postingCount : "
      //		+ postingCount + " / lastDocNo : " + lastDocNo);
      // output.println("postingList:"+postingList);
      if (findDocNo != -1) {
        // output.println("findDocNo:"+findDocNo+"
        // "+(postingList.contains(findDocNo)?"CONTAINS":"NOT"));
      }
    }

    output.println("docNo [" + findDocNo + "] has word " + findWord);
  }
  @Override
  protected boolean done(RevisionInfo revisionInfo, IndexStatus indexStatus)
      throws IRException, IndexingStopException {

    int insertCount = revisionInfo.getInsertCount();
    int deleteCount = revisionInfo.getDeleteCount();
    FilePaths indexFilePaths = collectionContext.indexFilePaths();
    try {
      if ((insertCount > 0 || deleteCount > 0) && !stopRequested) {
        if (insertCount > 0) {
          revisionInfo.setRefWithRevision();
        } else {
          // 추가문서가 없고 삭제문서만 존재할 경우
          logger.debug("추가문서없이 삭제문서만 존재합니다.!!");
          if (workingSegmentInfo != null && !workingSegmentInfo.equals(workingSegmentInfo)) {
            // 기존색인문서수가 limit을 넘으면서 삭제문서만 색인될 경우 세그먼트가 바뀌는 현상이 나타날수 있다.
            // 색인후 문서가 0건이고 delete문서가 존재하면 이전 세그먼트의 다음 리비전으로 변경해주는 작업필요.
            // 세그먼트가 다르면, 즉 증가했으면 다시 원래의 세그먼트로 돌리고, rev를 증가시킨다.
            File segmentDir = indexFilePaths.file(workingSegmentInfo.getId());
            // FileUtils.deleteDirectory(segmentDir);
            FileUtils.forceDelete(segmentDir);

            logger.debug("# 추가문서가 없으므로, segment를 삭제합니다. {}", segmentDir.getAbsolutePath());
            workingSegmentInfo = workingSegmentInfo.copy();
            int revision = workingSegmentInfo.getRevision();
            workingSegmentInfo.getRevisionInfo().setInsertCount(0);
            workingSegmentInfo.getRevisionInfo().setUpdateCount(0);
            workingSegmentInfo.getRevisionInfo().setDeleteCount(deleteCount);

            // 이전 리비전의 delete.set.#을 현 리비전으로 복사해온다.
            // 원래 primarykeyindexeswriter에서 append일 경우 복사를 하나, 여기서는 추가문서가 0이므로
            String segmentId = workingSegmentInfo.getId();
            segmentDir = indexFilePaths.file(workingSegmentInfo.getId());
            File revisionDir = IndexFileNames.getRevisionDir(segmentDir, revision);
            File prevRevisionDir = IndexFileNames.getRevisionDir(segmentDir, revision - 1);
            String deleteFileName =
                IndexFileNames.getSuffixFileName(IndexFileNames.docDeleteSet, segmentId);
            FileUtils.copyFile(
                new File(prevRevisionDir, deleteFileName), new File(revisionDir, deleteFileName));
          }
          /*
           * else 세그먼트가 증가하지 않고 리비전이 증가한 경우.
           */
        }

        File segmentDir = indexFilePaths.file(workingSegmentInfo.getId());
        collectionHandler.updateCollection(
            collectionContext, workingSegmentInfo, segmentDir, deleteIdSet);

        // status.xml 업데이트
        collectionContext.updateCollectionStatus(
            IndexingType.ADD, revisionInfo, startTime, System.currentTimeMillis());
        collectionContext.indexStatus().setAddIndexStatus(indexStatus);

        return true;
      } else {
        // 추가,삭제 문서 모두 없을때.
        // 리비전 디렉토리 삭제.
        File segmentDir = indexFilePaths.file(workingSegmentInfo.getId());
        File revisionDir = IndexFileNames.getRevisionDir(segmentDir, revisionInfo.getId());
        if (workingSegmentInfo != null && !workingSegmentInfo.equals(workingSegmentInfo)) {
          // 세그먼트 증가시 segment디렉토리 삭제.
          FileUtils.deleteDirectory(segmentDir);
          logger.info("delete segment dir ={}", segmentDir.getAbsolutePath());
        } else {
          // 리비전 증가시 revision디렉토리 삭제.
          FileUtils.deleteDirectory(revisionDir);
          logger.info("delete revision dir ={}", revisionDir.getAbsolutePath());
        }

        if (!stopRequested) {
          logger.info(
              "[{}] Indexing Canceled due to no documents.", collectionContext.collectionId());
          throw new IndexingStopException(
              collectionContext.collectionId() + " Indexing Canceled due to no documents.");
        } else {
          logger.info(
              "[{}] Indexing Canceled due to Stop Requested!", collectionContext.collectionId());
          throw new IndexingStopException(
              collectionContext.collectionId() + " Indexing Canceled due to Stop Requested");
        }
      }

    } catch (IOException e) {
      throw new IRException(e);
    }
  }