/** * Splits events of a row if they overlap an island. Islands are areas between the token which are * included in the result. * * @param row * @param graph * @param text * @param startTokenIndex token index of the first token in the match * @param endTokenIndex token index of the last token in the match */ private static void splitRowsOnIslands( Row row, final SDocumentGraph graph, STextualDS text, long startTokenIndex, long endTokenIndex) { BitSet tokenCoverage = new BitSet(); // get the sorted token List<SToken> sortedTokenList = graph.getSortedTokenByText(); // add all token belonging to the right text to the bit set ListIterator<SToken> itToken = sortedTokenList.listIterator(); while (itToken.hasNext()) { SToken t = itToken.next(); if (text == null || text == CommonHelper.getTextualDSForNode(t, graph)) { RelannisNodeFeature feat = (RelannisNodeFeature) t.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue(); long tokenIndexRaw = feat.getTokenIndex(); tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex); int tokenIndex = (int) (tokenIndexRaw - startTokenIndex); tokenCoverage.set(tokenIndex); } } ListIterator<GridEvent> itEvents = row.getEvents().listIterator(); while (itEvents.hasNext()) { GridEvent event = itEvents.next(); BitSet eventBitSet = new BitSet(); eventBitSet.set(event.getLeft(), event.getRight() + 1); // restrict event bitset on the locations where token are present eventBitSet.and(tokenCoverage); // if there is is any 0 bit before the right border there is a break in the event // and we need to split it if (eventBitSet.nextClearBit(event.getLeft()) <= event.getRight()) { // remove the original event row.removeEvent(itEvents); // The event bitset now marks all the locations which the event should // cover. // Make a list of new events for each connected range in the bitset int subElement = 0; int offset = eventBitSet.nextSetBit(0); while (offset >= 0) { int end = eventBitSet.nextClearBit(offset) - 1; if (offset < end) { GridEvent newEvent = new GridEvent(event); newEvent.setId(event.getId() + "_islandsplit_" + subElement++); newEvent.setLeft(offset); newEvent.setRight(end); row.addEvent(itEvents, newEvent); } offset = eventBitSet.nextSetBit(end + 1); } } // end if we need to split } }
/** * Splits events of a row if they contain a gap. Gaps are found using the token index (provided as * ANNIS specific {@link SFeature}. Inserted events have a special style to mark them as gaps. * * @param row * @param graph * @param startTokenIndex token index of the first token in the match * @param endTokenIndex token index of the last token in the match */ private static void splitRowsOnGaps( Row row, final SDocumentGraph graph, long startTokenIndex, long endTokenIndex) { ListIterator<GridEvent> itEvents = row.getEvents().listIterator(); while (itEvents.hasNext()) { GridEvent event = itEvents.next(); int lastTokenIndex = -1; // sort the coveredIDs LinkedList<String> sortedCoveredToken = new LinkedList<>(event.getCoveredIDs()); Collections.sort( sortedCoveredToken, new Comparator<String>() { @Override public int compare(String o1, String o2) { SNode node1 = graph.getNode(o1); SNode node2 = graph.getNode(o2); if (node1 == node2) { return 0; } if (node1 == null) { return -1; } if (node2 == null) { return +1; } RelannisNodeFeature feat1 = (RelannisNodeFeature) node1.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue(); RelannisNodeFeature feat2 = (RelannisNodeFeature) node2.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue(); long tokenIndex1 = feat1.getTokenIndex(); long tokenIndex2 = feat2.getTokenIndex(); return ((Long) (tokenIndex1)).compareTo(tokenIndex2); } }); // first calculate all gaps List<GridEvent> gaps = new LinkedList<>(); for (String id : sortedCoveredToken) { SNode node = graph.getNode(id); RelannisNodeFeature feat = (RelannisNodeFeature) node.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue(); long tokenIndexRaw = feat.getTokenIndex(); tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex); int tokenIndex = (int) (tokenIndexRaw - startTokenIndex); // sanity check if (tokenIndex >= event.getLeft() && tokenIndex <= event.getRight()) { int diff = tokenIndex - lastTokenIndex; if (lastTokenIndex >= 0 && diff > 1) { // we detected a gap GridEvent gap = new GridEvent( event.getId() + "_gap_" + gaps.size(), lastTokenIndex + 1, tokenIndex - 1, ""); gap.setGap(true); gaps.add(gap); } lastTokenIndex = tokenIndex; } else { // reset gap search when discovered there were token we use for // hightlighting but do not actually cover lastTokenIndex = -1; } } // end for each covered token id ListIterator<GridEvent> itGaps = gaps.listIterator(); // remember the old right value int oldRight = event.getRight(); int gapNr = 0; while (itGaps.hasNext()) { GridEvent gap = itGaps.next(); if (gapNr == 0) { // shorten original event event.setRight(gap.getLeft() - 1); } // insert the real gap itEvents.add(gap); int rightBorder = oldRight; if (itGaps.hasNext()) { // don't use the old event right border since the gap should only go until // the next event GridEvent nextGap = itGaps.next(); itGaps.previous(); rightBorder = nextGap.getLeft() - 1; } // insert a new event node that covers the rest of the event GridEvent after = new GridEvent(event); after.setId(event.getId() + "_after_" + gapNr); after.setLeft(gap.getRight() + 1); after.setRight(rightBorder); itEvents.add(after); gapNr++; } } }