コード例 #1
0
  public boolean incrementToken() throws IOException {
    if (!morphQueue.isEmpty()) {
      restoreState(currentState);
      setAttributesFromQueue(false);
      return true;
    }

    while (input.incrementToken()) {
      final String type = typeAtt.type();
      if (KOREAN_TYPE.equals(type)) {
        try {
          analysisKorean(termAtt.toString());
        } catch (MorphException e) {
          throw new RuntimeException(e);
        }
      } else {
        return true; // pass anything else thru
      }

      if (!morphQueue.isEmpty()) {
        setAttributesFromQueue(true);
        return true;
      }
    }

    return false;
  }
コード例 #2
0
  private void setAttributesFromQueue(boolean isFirst) {
    final KoreanToken iw = morphQueue.removeFirst();
    if (isFirst && !morphQueue.isEmpty()) {
      // our queue has more elements remaining (e.g. we decompounded)
      // capture state for those. We set the term attribute to be empty
      // so we save lots of array copying later.
      termAtt.setEmpty();
      currentState = captureState();
    }

    termAtt.setEmpty().append(iw.getTerm());
    offsetAtt.setOffset(iw.getOffset(), iw.getOffset() + iw.getLength());
    morphAtt.setToken(iw);

    // on the first Token we preserve incoming increment:
    if (!isFirst) {
      posIncrAtt.setPositionIncrement(iw.getPosInc());
    }

    // TODO: How to handle PositionLengthAttribute correctly?
  }
コード例 #3
0
  /**
   * Analyze korean text
   *
   * @throws MorphException
   */
  private void analysisKorean(String input) throws MorphException {

    input = trimHangul(input);
    List<AnalysisOutput> outputs = morph.analyze(input);
    if (outputs.size() == 0) return;

    Map<String, KoreanToken> map = new LinkedHashMap<String, KoreanToken>();
    if (hasOrigin) map.put("0:" + input, new KoreanToken(input, offsetAtt.startOffset()));

    extractKeyword(outputs, offsetAtt.startOffset(), map, 0);

    //	  if(outputs.get(0).getScore()>=AnalysisOutput.SCORE_COMPOUNDS) {
    //		extractKeyword(outputs,offsetAtt.startOffset(), map, 0);
    ////	} else {
    ////	  // check whether the input text has some insert spacing errors.
    ////	  List<AnalysisOutput> list = wsAnal.analyze(input);
    ////	  List<AnalysisOutput> results = new ArrayList<AnalysisOutput>();
    ////	  if(list.size()>1 && wsAnal.getOutputScore(list)>AnalysisOutput.SCORE_ANALYSIS) {
    ////		int offset = 0;
    ////		for(AnalysisOutput o : list) {
    ////		  if(hasOrigin) map.put(o.getSource(), new
    // Token(o.getSource(),offsetAtt.startOffset()+offset,1));
    ////		  results.addAll(morph.analyze(o.getSource()));
    ////		  offset += o.getSource().length();
    ////		}
    ////	  } else {
    ////		results.addAll(outputs);
    ////	  }
    ////	  extractKeyword(results, offsetAtt.startOffset(), map, 0);
    //	  }

    Collection<KoreanToken> values = map.values();
    for (KoreanToken kt : values) {
      kt.setOutputs(outputs);
    }

    morphQueue.addAll(map.values());
  }
コード例 #4
0
 @Override
 public void reset() throws IOException {
   super.reset();
   morphQueue.clear();
   currentState = null;
 }