Java EditDistAligner 예제들

프로그래밍 언어: Java

클래스/타입: EditDistAligner

hotexamples.com에서의 예제들: 2

Java EditDistAligner - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 EditDistAligner에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

align(2)

예제 #1

파일 보기

파일: RecursiveAlignmentTool.java 프로젝트: mervindsouza/RETAS

  // STAGE 1 : apply word level alignment for each segment
  private void wordAlignSubsequences() {

    String[] refTokens = refIndex.getTokens();
    String[] candTokens = ocrIndex.getTokens();
    EditDistAligner aligner = new EditDistAligner();

    int startRef = 0, endRef = 0, startCand = 0, endCand = 0;

    // now align each segment using dynamic programming
    for (int j = 0; j <= anchorsRef.size(); j++) {

      // copy segment
      if (j == anchorsRef.size()) {
        endRef = refIndex.getNumOfTokens();
        endCand = ocrIndex.getNumOfTokens();
      } else {
        endRef = anchorsRef.get(j).getPosInt();
        endCand = anchorsCand.get(j).getPosInt();
      }

      // if the segment is larger than the largest segment size, then do not align the sequence
      long dynamicTableSize = ((long) (endRef - startRef) * (long) (endCand - startCand));

      if (dynamicTableSize != 0 && dynamicTableSize <= MAX_DYNAMIC_TABLE_SIZE) {
        // run dynamic programming
        ArrayList<AlignedSequence> sq =
            aligner.align(refTokens, candTokens, startRef, endRef, startCand, endCand);
        alignment.addAll(sq);

      } else {
        for (int ls = startRef; ls < endRef; ls++) {
          alignment.add(new AlignedSequence(refTokens[ls], null));
        }
        for (int ls = startCand; ls < endCand; ls++) {
          alignment.add(new AlignedSequence(null, candTokens[ls]));
        }
      }

      startRef = endRef;
      startCand = endCand;
    }
  }

예제 #2

파일 보기

파일: RecursiveAlignmentTool.java 프로젝트: mervindsouza/RETAS

  // STAGE 2: based on word level alignment, align all the characters
  // Michael Z    7/2013   Modified to avoid ArrayIndexOutOfBoundsException when there
  // are long streteches without an anchor word. To avoid that, we only build the "accumulator"
  // array when we need it for the alignment and we're assured all the words will fit in the array.
  private void charAlignSubsequences() {
    int EXPECTED_WORD_LENGTH_IN_CHARACTERS = 5; // for memory pre-allocation

    ArrayList<AlignedSequence> alignment2 =
        new ArrayList<AlignedSequence>(alignment.size() * EXPECTED_WORD_LENGTH_IN_CHARACTERS);
    EditDistAligner aligner = new EditDistAligner();

    String candAccu[] = new String[(int) MAX_DYNAMIC_TABLE_SIZE];
    String refAccu[] = new String[(int) MAX_DYNAMIC_TABLE_SIZE];
    int candAccuSize = 0;
    int refAccuSize = 0;
    int refStartAnchorIdx = -1;
    int candStartAnchorIdx = -1;
    String ch;

    String cand, ref;
    for (int i = 0; i <= alignment.size(); i++) {

      if (alignment.size() == i) { // dont forget the last text segment
        cand = "";
        ref = "";
      } else {
        AlignedSequence cur = alignment.get(i);
        cand = cur.m_candidate;
        ref = cur.m_reference;
      }

      // save the index of the first "anchor" so we
      // know where to start the alignment below
      if (cand != null && candStartAnchorIdx == -1) {
        candStartAnchorIdx = i;
      }
      if (ref != null && refStartAnchorIdx == -1) {
        refStartAnchorIdx = i;
      }

      // if either is null, skip the alignment step, we only align
      // when the reference word is the same as the candidate word.
      if (ref == null || cand == null) {
        // only one of the words can be null at a time.
        if (cand != null) {
          candAccuSize += cand.length() + 1; // +1 for the space after the word
        } else { // ref != null
          refAccuSize += ref.length() + 1;
        }
        continue;
      }

      // if we got here, we have two words
      if (!cand.equals(ref)) {
        // words don't match, increment the character accumulators
        // and skip trying to align until the words match
        refAccuSize += ref.length() + 1; // +1 for the space after the word
        candAccuSize += cand.length() + 1;
        continue;
      }

      // we only do an alignment if it's small enough to do efficiently
      boolean blnUseDynamicAlignment =
          ((long) refAccuSize * (long) candAccuSize < MAX_DYNAMIC_TABLE_SIZE)
              && refAccuSize != 0
              && candAccuSize != 0;

      // build the array to pass to the aligner or just null align
      int tmpAccuSize = 0;
      for (int kk = refStartAnchorIdx; kk < i; kk++) {
        if (alignment.get(kk).m_reference == null) {
          continue; // only process non-null words
        }
        String tmpWord = alignment.get(kk).m_reference + " "; // add space at end of word
        for (int charIdx = 0; charIdx < tmpWord.length(); charIdx++) {
          if (blnUseDynamicAlignment) {
            refAccu[tmpAccuSize++] = Character.toString(tmpWord.charAt(charIdx));
          } else {
            alignment2.add(new AlignedSequence(tmpWord.charAt(charIdx), null));
          }
        }
      }

      tmpAccuSize = 0;
      for (int kk = candStartAnchorIdx; kk < i; kk++) {
        if (alignment.get(kk).m_candidate == null) {
          continue;
        }
        String tmpWord = alignment.get(kk).m_candidate + " "; // add space at end of word
        for (int charIdx = 0; charIdx < tmpWord.length(); charIdx++) {
          if (blnUseDynamicAlignment) {
            candAccu[tmpAccuSize++] = Character.toString(tmpWord.charAt(charIdx));
          } else {
            alignment2.add(new AlignedSequence(null, tmpWord.charAt(charIdx)));
          }
        }
      }

      // add it to the resulting sequence if
      if (blnUseDynamicAlignment) {
        ArrayList<AlignedSequence> sq =
            aligner.align(refAccu, candAccu, 0, refAccuSize, 0, candAccuSize);
        alignment2.addAll(sq);
      }

      // we know that current words align
      for (int j = 0; j < ref.length(); j++) {
        String cha = ref.substring(j, j + 1);
        alignment2.add(new AlignedSequence(cha, cha));
      }
      alignment2.add(new AlignedSequence(" ", " "));

      // reset accumulators & anchor indexes
      refAccuSize = 0;
      candAccuSize = 0;
      refStartAnchorIdx = -1;
      candStartAnchorIdx = -1;
    } // end loop through the word level alignment

    // we always add a space after a word, and the last run through uses
    // an empty string for both the cand and ref word to make sure we "fall through"
    // to the logic and process any words still waiting to be aligned.
    // Now that we're done, remove the last two alignments. Without this, there
    // are extra spaces in the alignment that don't exist in the original works and
    // a sanity check such as checkCharLevelAlignment() will fail.
    alignment2.remove(alignment2.size() - 1);
    alignment2.remove(alignment2.size() - 1);
    alignment = alignment2;
  }