/** * Splits events of a row if they overlap an island. Islands are areas between the token which are * included in the result. * * @param row * @param graph * @param text * @param startTokenIndex token index of the first token in the match * @param endTokenIndex token index of the last token in the match */ private static void splitRowsOnIslands( Row row, final SDocumentGraph graph, STextualDS text, long startTokenIndex, long endTokenIndex) { BitSet tokenCoverage = new BitSet(); // get the sorted token List<SToken> sortedTokenList = graph.getSortedTokenByText(); // add all token belonging to the right text to the bit set ListIterator<SToken> itToken = sortedTokenList.listIterator(); while (itToken.hasNext()) { SToken t = itToken.next(); if (text == null || text == CommonHelper.getTextualDSForNode(t, graph)) { RelannisNodeFeature feat = (RelannisNodeFeature) t.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue(); long tokenIndexRaw = feat.getTokenIndex(); tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex); int tokenIndex = (int) (tokenIndexRaw - startTokenIndex); tokenCoverage.set(tokenIndex); } } ListIterator<GridEvent> itEvents = row.getEvents().listIterator(); while (itEvents.hasNext()) { GridEvent event = itEvents.next(); BitSet eventBitSet = new BitSet(); eventBitSet.set(event.getLeft(), event.getRight() + 1); // restrict event bitset on the locations where token are present eventBitSet.and(tokenCoverage); // if there is is any 0 bit before the right border there is a break in the event // and we need to split it if (eventBitSet.nextClearBit(event.getLeft()) <= event.getRight()) { // remove the original event row.removeEvent(itEvents); // The event bitset now marks all the locations which the event should // cover. // Make a list of new events for each connected range in the bitset int subElement = 0; int offset = eventBitSet.nextSetBit(0); while (offset >= 0) { int end = eventBitSet.nextClearBit(offset) - 1; if (offset < end) { GridEvent newEvent = new GridEvent(event); newEvent.setId(event.getId() + "_islandsplit_" + subElement++); newEvent.setLeft(offset); newEvent.setRight(end); row.addEvent(itEvents, newEvent); } offset = eventBitSet.nextSetBit(end + 1); } } // end if we need to split } }
private static String getTextID(SToken tok, SDocumentGraph graph) { List<? extends SRelation<? extends SNode, ? extends SNode>> tokenOutEdges = graph.getOutRelations(tok.getId()); if (tokenOutEdges != null) { for (SRelation<? extends SNode, ? extends SNode> tokEdge : tokenOutEdges) { if (tokEdge instanceof STextualRelation) { return ((STextualRelation) tokEdge).getTarget().getId(); } } } return null; }
private static void addAnnotationsForNode( SNode node, SDocumentGraph graph, long startTokenIndex, long endTokenIndex, PDFController pdfController, PDFPageHelper pageNumberHelper, AtomicInteger eventCounter, LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation, boolean addMatch, Set<String> mediaLayer, boolean replaceValueWithMediaIcon) { List<String> matchedAnnos = new ArrayList<>(); SFeature featMatchedAnnos = graph.getFeature(ANNIS_NS, FEAT_MATCHEDANNOS); if (featMatchedAnnos != null) { matchedAnnos = Splitter.on(',').trimResults().splitToList(featMatchedAnnos.getValue_STEXT()); } // check if the span is a matched node SFeature featMatched = node.getFeature(ANNIS_NS, FEAT_MATCHEDNODE); Long matchRaw = featMatched == null ? null : featMatched.getValue_SNUMERIC(); String matchedQualifiedAnnoName = ""; if (matchRaw != null && matchRaw <= matchedAnnos.size()) { matchedQualifiedAnnoName = matchedAnnos.get((int) ((long) matchRaw) - 1); } // calculate the left and right values of a span // TODO: howto get these numbers with Salt? RelannisNodeFeature feat = (RelannisNodeFeature) node.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue(); long leftLong = feat.getLeftToken(); long rightLong = feat.getRightToken(); leftLong = clip(leftLong, startTokenIndex, endTokenIndex); rightLong = clip(rightLong, startTokenIndex, endTokenIndex); int left = (int) (leftLong - startTokenIndex); int right = (int) (rightLong - startTokenIndex); for (SAnnotation anno : node.getAnnotations()) { ArrayList<Row> rows = rowsByAnnotation.get(anno.getQName()); if (rows == null) { // try again with only the name rows = rowsByAnnotation.get(anno.getName()); } if (rows != null) { // only do something if the annotation was defined before // 1. give each annotation of each span an own row Row r = new Row(); String id = "event_" + eventCounter.incrementAndGet(); GridEvent event = new GridEvent(id, left, right, anno.getValue_STEXT()); event.setTooltip(Helper.getQualifiedName(anno)); if (addMatch && matchRaw != null) { long match = matchRaw; if (matchedQualifiedAnnoName.isEmpty()) { // always set the match when there is no matched annotation at all event.setMatch(match); } // check if the annotation also matches else if (matchedQualifiedAnnoName.equals(anno.getQName())) { event.setMatch(match); } } if (node instanceof SSpan) { // calculate overlapped SToken List<? extends SRelation<? extends SNode, ? extends SNode>> outEdges = graph.getOutRelations(node.getId()); if (outEdges != null) { for (SRelation<? extends SNode, ? extends SNode> e : outEdges) { if (e instanceof SSpanningRelation) { SSpanningRelation spanRel = (SSpanningRelation) e; SToken tok = spanRel.getTarget(); event.getCoveredIDs().add(tok.getId()); // get the STextualDS of this token and add it to the event String textID = getTextID(tok, graph); if (textID != null) { event.setTextID(textID); } } } } // end if span has out edges } else if (node instanceof SToken) { event.getCoveredIDs().add(node.getId()); // get the STextualDS of this token and add it to the event String textID = getTextID((SToken) node, graph); if (textID != null) { event.setTextID(textID); } } // try to get time annotations if (mediaLayer == null || mediaLayer.contains(anno.getQName())) { double[] startEndTime = TimeHelper.getOverlappedTime(node); if (startEndTime.length == 1) { if (replaceValueWithMediaIcon) { event.setValue(" "); event.setTooltip("play excerpt " + event.getStartTime()); } event.setStartTime(startEndTime[0]); } else if (startEndTime.length == 2) { event.setStartTime(startEndTime[0]); event.setEndTime(startEndTime[1]); if (replaceValueWithMediaIcon) { event.setValue(" "); event.setTooltip("play excerpt " + event.getStartTime() + "-" + event.getEndTime()); } } } r.addEvent(event); rows.add(r); if (pdfController != null && pdfController.sizeOfRegisterdPDFViewer() > 0) { String page = pageNumberHelper.getPageFromAnnotation(node); if (page != null) { event.setPage(page); } } } } // end for each annotation of span }