/** * Splits events of a row if they overlap an island. Islands are areas between the token which are * included in the result. * * @param row * @param graph * @param text * @param startTokenIndex token index of the first token in the match * @param endTokenIndex token index of the last token in the match */ private static void splitRowsOnIslands( Row row, final SDocumentGraph graph, STextualDS text, long startTokenIndex, long endTokenIndex) { BitSet tokenCoverage = new BitSet(); // get the sorted token List<SToken> sortedTokenList = graph.getSortedTokenByText(); // add all token belonging to the right text to the bit set ListIterator<SToken> itToken = sortedTokenList.listIterator(); while (itToken.hasNext()) { SToken t = itToken.next(); if (text == null || text == CommonHelper.getTextualDSForNode(t, graph)) { RelannisNodeFeature feat = (RelannisNodeFeature) t.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue(); long tokenIndexRaw = feat.getTokenIndex(); tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex); int tokenIndex = (int) (tokenIndexRaw - startTokenIndex); tokenCoverage.set(tokenIndex); } } ListIterator<GridEvent> itEvents = row.getEvents().listIterator(); while (itEvents.hasNext()) { GridEvent event = itEvents.next(); BitSet eventBitSet = new BitSet(); eventBitSet.set(event.getLeft(), event.getRight() + 1); // restrict event bitset on the locations where token are present eventBitSet.and(tokenCoverage); // if there is is any 0 bit before the right border there is a break in the event // and we need to split it if (eventBitSet.nextClearBit(event.getLeft()) <= event.getRight()) { // remove the original event row.removeEvent(itEvents); // The event bitset now marks all the locations which the event should // cover. // Make a list of new events for each connected range in the bitset int subElement = 0; int offset = eventBitSet.nextSetBit(0); while (offset >= 0) { int end = eventBitSet.nextClearBit(offset) - 1; if (offset < end) { GridEvent newEvent = new GridEvent(event); newEvent.setId(event.getId() + "_islandsplit_" + subElement++); newEvent.setLeft(offset); newEvent.setRight(end); row.addEvent(itEvents, newEvent); } offset = eventBitSet.nextSetBit(end + 1); } } // end if we need to split } }
public static void removeEmptySpace( LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation, Row tokenRow) { List<Range<Integer>> gaps = new LinkedList<>(); BitSet totalOccupancyGrid = new BitSet(); for (Map.Entry<String, ArrayList<Row>> layer : rowsByAnnotation.entrySet()) { for (Row r : layer.getValue()) { totalOccupancyGrid.or(r.getOccupancyGridCopy()); } } // We always include the token row in the occupancy grid since it is not // a gap. Otherwise empty token would trigger gaps if the token list // is included in the visualizer output. // See https://github.com/korpling/ANNIS/issues/281 for the corresponding // bug report. if (tokenRow != null) { totalOccupancyGrid.or(tokenRow.getOccupancyGridCopy()); } // The Range class can give us the next bit that is not set. Use this // to detect gaps. A gap starts from the next non-set bit and goes to // the next set bit. Range<Integer> gap = Range.closed(-1, totalOccupancyGrid.nextSetBit(0)); while (true) { int gapStart = totalOccupancyGrid.nextClearBit(gap.upperEndpoint() + 1); int gapEnd = totalOccupancyGrid.nextSetBit(gapStart); if (gapEnd <= 0) { break; } gap = Range.closed(gapStart, gapEnd - 1); gaps.add(gap); } int gapID = 0; int totalOffset = 0; for (Range<Integer> gRaw : gaps) { // adjust the space range itself Range<Integer> g = Range.closed(gRaw.lowerEndpoint() - totalOffset, gRaw.upperEndpoint() - totalOffset); int offset = g.upperEndpoint() - g.lowerEndpoint(); totalOffset += offset; for (Entry<String, ArrayList<Row>> rowEntry : rowsByAnnotation.entrySet()) { ArrayList<Row> rows = rowEntry.getValue(); for (Row r : rows) { List<GridEvent> eventsCopy = new LinkedList<>(r.getEvents()); for (GridEvent e : eventsCopy) { if (e.getLeft() >= g.upperEndpoint()) { r.removeEvent(e); e.setLeft(e.getLeft() - offset); e.setRight(e.getRight() - offset); r.addEvent(e); } } // add a special space event String spaceCaption = ""; if ("tok".equalsIgnoreCase(rowEntry.getKey())) { spaceCaption = "(...)"; } GridEvent spaceEvent = new GridEvent("gap-" + gapID, g.lowerEndpoint(), g.lowerEndpoint(), spaceCaption); spaceEvent.setSpace(true); r.addEvent(spaceEvent); gapID++; } } } }