コード例 #1
0
ファイル: EventExtractor.java プロジェクト: korpling/ANNIS
  /**
   * Splits events of a row if they overlap an island. Islands are areas between the token which are
   * included in the result.
   *
   * @param row
   * @param graph
   * @param text
   * @param startTokenIndex token index of the first token in the match
   * @param endTokenIndex token index of the last token in the match
   */
  private static void splitRowsOnIslands(
      Row row,
      final SDocumentGraph graph,
      STextualDS text,
      long startTokenIndex,
      long endTokenIndex) {

    BitSet tokenCoverage = new BitSet();
    // get the sorted token
    List<SToken> sortedTokenList = graph.getSortedTokenByText();
    // add all token belonging to the right text to the bit set
    ListIterator<SToken> itToken = sortedTokenList.listIterator();
    while (itToken.hasNext()) {
      SToken t = itToken.next();
      if (text == null || text == CommonHelper.getTextualDSForNode(t, graph)) {
        RelannisNodeFeature feat =
            (RelannisNodeFeature) t.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
        long tokenIndexRaw = feat.getTokenIndex();

        tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex);
        int tokenIndex = (int) (tokenIndexRaw - startTokenIndex);
        tokenCoverage.set(tokenIndex);
      }
    }

    ListIterator<GridEvent> itEvents = row.getEvents().listIterator();
    while (itEvents.hasNext()) {
      GridEvent event = itEvents.next();
      BitSet eventBitSet = new BitSet();
      eventBitSet.set(event.getLeft(), event.getRight() + 1);

      // restrict event bitset on the locations where token are present
      eventBitSet.and(tokenCoverage);

      // if there is is any 0 bit before the right border there is a break in the event
      // and we need to split it
      if (eventBitSet.nextClearBit(event.getLeft()) <= event.getRight()) {
        // remove the original event
        row.removeEvent(itEvents);

        // The event bitset now marks all the locations which the event should
        // cover.
        // Make a list of new events for each connected range in the bitset
        int subElement = 0;
        int offset = eventBitSet.nextSetBit(0);
        while (offset >= 0) {
          int end = eventBitSet.nextClearBit(offset) - 1;
          if (offset < end) {
            GridEvent newEvent = new GridEvent(event);
            newEvent.setId(event.getId() + "_islandsplit_" + subElement++);
            newEvent.setLeft(offset);
            newEvent.setRight(end);
            row.addEvent(itEvents, newEvent);
          }
          offset = eventBitSet.nextSetBit(end + 1);
        }
      } // end if we need to split
    }
  }
コード例 #2
0
ファイル: EventExtractor.java プロジェクト: korpling/ANNIS
  public static void removeEmptySpace(
      LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation, Row tokenRow) {
    List<Range<Integer>> gaps = new LinkedList<>();

    BitSet totalOccupancyGrid = new BitSet();
    for (Map.Entry<String, ArrayList<Row>> layer : rowsByAnnotation.entrySet()) {
      for (Row r : layer.getValue()) {
        totalOccupancyGrid.or(r.getOccupancyGridCopy());
      }
    }
    // We always include the token row in the occupancy grid since it is not
    // a gap. Otherwise empty token would trigger gaps if the token list
    // is included in the visualizer output.
    // See https://github.com/korpling/ANNIS/issues/281 for the corresponding
    // bug report.
    if (tokenRow != null) {
      totalOccupancyGrid.or(tokenRow.getOccupancyGridCopy());
    }

    // The Range class can give us the next bit that is not set. Use this
    // to detect gaps. A gap starts from the next non-set bit and goes to
    // the next set bit.
    Range<Integer> gap = Range.closed(-1, totalOccupancyGrid.nextSetBit(0));
    while (true) {
      int gapStart = totalOccupancyGrid.nextClearBit(gap.upperEndpoint() + 1);
      int gapEnd = totalOccupancyGrid.nextSetBit(gapStart);
      if (gapEnd <= 0) {
        break;
      }
      gap = Range.closed(gapStart, gapEnd - 1);
      gaps.add(gap);
    }

    int gapID = 0;
    int totalOffset = 0;
    for (Range<Integer> gRaw : gaps) {
      // adjust the space range itself
      Range<Integer> g =
          Range.closed(gRaw.lowerEndpoint() - totalOffset, gRaw.upperEndpoint() - totalOffset);
      int offset = g.upperEndpoint() - g.lowerEndpoint();
      totalOffset += offset;

      for (Entry<String, ArrayList<Row>> rowEntry : rowsByAnnotation.entrySet()) {
        ArrayList<Row> rows = rowEntry.getValue();
        for (Row r : rows) {
          List<GridEvent> eventsCopy = new LinkedList<>(r.getEvents());
          for (GridEvent e : eventsCopy) {
            if (e.getLeft() >= g.upperEndpoint()) {

              r.removeEvent(e);
              e.setLeft(e.getLeft() - offset);
              e.setRight(e.getRight() - offset);
              r.addEvent(e);
            }
          }

          // add a special space event
          String spaceCaption = "";
          if ("tok".equalsIgnoreCase(rowEntry.getKey())) {
            spaceCaption = "(...)";
          }
          GridEvent spaceEvent =
              new GridEvent("gap-" + gapID, g.lowerEndpoint(), g.lowerEndpoint(), spaceCaption);
          spaceEvent.setSpace(true);
          r.addEvent(spaceEvent);
          gapID++;
        }
      }
    }
  }