public Boundary predict(Collection<Chunk> chunks) {
    if (chunks == null || chunks.size() < 2) return null;

    List<Chunk> sortedChunks = new LinkedList<Chunk>(chunks);
    Collections.sort(sortedChunks);

    int leftBoundaryWidth = 0;
    int rightBoundaryWidth = 0;

    boolean leftMatches = true;
    boolean rightMatches = true;

    Chunk chunk0 = sortedChunks.get(0);
    String whole = chunk0.getWhole();

    do {
      Character leftBoundaryChar = chunk0.getSymbolOutside(-leftBoundaryWidth - 1);
      Character rightBoundaryChar = chunk0.getSymbolOutside(rightBoundaryWidth + 1);

      if (leftBoundaryChar != null) {
        for (int i = 1; i < sortedChunks.size(); i++) {
          Chunk iChunk = sortedChunks.get(i);
          if (!leftBoundaryChar.equals(iChunk.getSymbolOutside(-leftBoundaryWidth - 1)))
            leftMatches = false;
        }
        if (leftMatches) {
          leftBoundaryWidth++;
        }
      } else {
        leftMatches = false;
      }

      if (rightBoundaryChar != null) {
        for (int i = 1; i < sortedChunks.size(); i++) {
          Chunk iChunk = sortedChunks.get(i);
          if (!rightBoundaryChar.equals(iChunk.getSymbolOutside(rightBoundaryWidth + 1)))
            rightMatches = false;
        }
        if (rightMatches) {
          rightBoundaryWidth++;
        }
      } else {
        rightMatches = false;
      }

      if (!leftMatches && !rightMatches && leftBoundaryWidth == 0 && rightBoundaryWidth == 0)
        return null;

    } while (leftMatches || rightMatches);

    String start =
        whole.substring(chunk0.getStartIndex() - leftBoundaryWidth, chunk0.getStartIndex());
    String end = whole.substring(chunk0.getEndIndex(), chunk0.getEndIndex() + rightBoundaryWidth);

    return new Boundary(start, end);
  }
Ejemplo n.º 2
0
  public String getChunkString() {
    /*
    <Chunk1> <Chunk2> ...
    Example:
    [0-6] [12-18] [39-40]
    */
    String output = "";
    for (int i = 0; i < chunks.size(); i++) {
      Chunk c = chunks.get(i);
      output += "[ ";
      output += c.getStartIndex();
      output += "-";
      output += c.getStopIndex();
      output += "]";
      if (i < chunks.size() - 1) output += " ";
    }

    return output;
  }