예제 #1
0
  /**
   * Splits events of a row if they overlap an island. Islands are areas between the token which are
   * included in the result.
   *
   * @param row
   * @param graph
   * @param text
   * @param startTokenIndex token index of the first token in the match
   * @param endTokenIndex token index of the last token in the match
   */
  private static void splitRowsOnIslands(
      Row row,
      final SDocumentGraph graph,
      STextualDS text,
      long startTokenIndex,
      long endTokenIndex) {

    BitSet tokenCoverage = new BitSet();
    // get the sorted token
    List<SToken> sortedTokenList = graph.getSortedTokenByText();
    // add all token belonging to the right text to the bit set
    ListIterator<SToken> itToken = sortedTokenList.listIterator();
    while (itToken.hasNext()) {
      SToken t = itToken.next();
      if (text == null || text == CommonHelper.getTextualDSForNode(t, graph)) {
        RelannisNodeFeature feat =
            (RelannisNodeFeature) t.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
        long tokenIndexRaw = feat.getTokenIndex();

        tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex);
        int tokenIndex = (int) (tokenIndexRaw - startTokenIndex);
        tokenCoverage.set(tokenIndex);
      }
    }

    ListIterator<GridEvent> itEvents = row.getEvents().listIterator();
    while (itEvents.hasNext()) {
      GridEvent event = itEvents.next();
      BitSet eventBitSet = new BitSet();
      eventBitSet.set(event.getLeft(), event.getRight() + 1);

      // restrict event bitset on the locations where token are present
      eventBitSet.and(tokenCoverage);

      // if there is is any 0 bit before the right border there is a break in the event
      // and we need to split it
      if (eventBitSet.nextClearBit(event.getLeft()) <= event.getRight()) {
        // remove the original event
        row.removeEvent(itEvents);

        // The event bitset now marks all the locations which the event should
        // cover.
        // Make a list of new events for each connected range in the bitset
        int subElement = 0;
        int offset = eventBitSet.nextSetBit(0);
        while (offset >= 0) {
          int end = eventBitSet.nextClearBit(offset) - 1;
          if (offset < end) {
            GridEvent newEvent = new GridEvent(event);
            newEvent.setId(event.getId() + "_islandsplit_" + subElement++);
            newEvent.setLeft(offset);
            newEvent.setRight(end);
            row.addEvent(itEvents, newEvent);
          }
          offset = eventBitSet.nextSetBit(end + 1);
        }
      } // end if we need to split
    }
  }
예제 #2
0
  /**
   * Splits events of a row if they contain a gap. Gaps are found using the token index (provided as
   * ANNIS specific {@link SFeature}. Inserted events have a special style to mark them as gaps.
   *
   * @param row
   * @param graph
   * @param startTokenIndex token index of the first token in the match
   * @param endTokenIndex token index of the last token in the match
   */
  private static void splitRowsOnGaps(
      Row row, final SDocumentGraph graph, long startTokenIndex, long endTokenIndex) {
    ListIterator<GridEvent> itEvents = row.getEvents().listIterator();
    while (itEvents.hasNext()) {
      GridEvent event = itEvents.next();

      int lastTokenIndex = -1;

      // sort the coveredIDs
      LinkedList<String> sortedCoveredToken = new LinkedList<>(event.getCoveredIDs());
      Collections.sort(
          sortedCoveredToken,
          new Comparator<String>() {
            @Override
            public int compare(String o1, String o2) {
              SNode node1 = graph.getNode(o1);
              SNode node2 = graph.getNode(o2);

              if (node1 == node2) {
                return 0;
              }
              if (node1 == null) {
                return -1;
              }
              if (node2 == null) {
                return +1;
              }

              RelannisNodeFeature feat1 =
                  (RelannisNodeFeature) node1.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
              RelannisNodeFeature feat2 =
                  (RelannisNodeFeature) node2.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();

              long tokenIndex1 = feat1.getTokenIndex();
              long tokenIndex2 = feat2.getTokenIndex();

              return ((Long) (tokenIndex1)).compareTo(tokenIndex2);
            }
          });

      // first calculate all gaps
      List<GridEvent> gaps = new LinkedList<>();
      for (String id : sortedCoveredToken) {

        SNode node = graph.getNode(id);
        RelannisNodeFeature feat =
            (RelannisNodeFeature) node.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
        long tokenIndexRaw = feat.getTokenIndex();

        tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex);

        int tokenIndex = (int) (tokenIndexRaw - startTokenIndex);

        // sanity check
        if (tokenIndex >= event.getLeft() && tokenIndex <= event.getRight()) {
          int diff = tokenIndex - lastTokenIndex;

          if (lastTokenIndex >= 0 && diff > 1) {
            // we detected a gap
            GridEvent gap =
                new GridEvent(
                    event.getId() + "_gap_" + gaps.size(), lastTokenIndex + 1, tokenIndex - 1, "");
            gap.setGap(true);
            gaps.add(gap);
          }

          lastTokenIndex = tokenIndex;
        } else {
          // reset gap search when discovered there were token we use for
          // hightlighting but do not actually cover
          lastTokenIndex = -1;
        }
      } // end for each covered token id

      ListIterator<GridEvent> itGaps = gaps.listIterator();
      // remember the old right value
      int oldRight = event.getRight();

      int gapNr = 0;
      while (itGaps.hasNext()) {
        GridEvent gap = itGaps.next();

        if (gapNr == 0) {
          // shorten original event
          event.setRight(gap.getLeft() - 1);
        }

        // insert the real gap
        itEvents.add(gap);

        int rightBorder = oldRight;
        if (itGaps.hasNext()) {
          // don't use the old event right border since the gap should only go until
          // the next event
          GridEvent nextGap = itGaps.next();
          itGaps.previous();

          rightBorder = nextGap.getLeft() - 1;
        }
        // insert a new event node that covers the rest of the event
        GridEvent after = new GridEvent(event);

        after.setId(event.getId() + "_after_" + gapNr);
        after.setLeft(gap.getRight() + 1);
        after.setRight(rightBorder);

        itEvents.add(after);
        gapNr++;
      }
    }
  }