コード例 #1
0
ファイル: EventExtractor.java プロジェクト: korpling/ANNIS
  /**
   * Merges the rows. This function uses a heuristical approach that guarantiess to merge all rows
   * into one if there is no conflict at all. If there are conflicts the heuristic will be
   * best-efford but with linear runtime (given a a number of rows).
   *
   * @param rows Will be altered, if no conflicts occcured this wil have only one element.
   */
  private static void mergeAllRowsIfPossible(ArrayList<Row> rows) {
    // use fixed seed in order to get consistent results (with random properties)
    Random rand = new Random(5711l);
    int tries = 0;
    // this should be enough to be quite sure we don't miss any optimalization
    // possibility
    final int maxTries = rows.size() * 2;

    // do this loop until we successfully merged everything into one row
    // or we give up until too much tries
    while (rows.size() > 1 && tries < maxTries) {
      // choose two random entries
      int oneIdx = rand.nextInt(rows.size());
      int secondIdx = rand.nextInt(rows.size());
      if (oneIdx == secondIdx) {
        // try again if we choose the same rows by accident
        continue;
      }

      Row one = rows.get(oneIdx);
      Row second = rows.get(secondIdx);

      if (one.merge(second)) {
        // remove the second one since it is merged into the first
        rows.remove(secondIdx);

        // success: reset counter
        tries = 0;
      } else {
        // increase counter to avoid endless loops if no improvement is possible
        tries++;
      }
    }
  }
コード例 #2
0
ファイル: EventExtractor.java プロジェクト: korpling/ANNIS
  /**
   * Splits events of a row if they overlap an island. Islands are areas between the token which are
   * included in the result.
   *
   * @param row
   * @param graph
   * @param text
   * @param startTokenIndex token index of the first token in the match
   * @param endTokenIndex token index of the last token in the match
   */
  private static void splitRowsOnIslands(
      Row row,
      final SDocumentGraph graph,
      STextualDS text,
      long startTokenIndex,
      long endTokenIndex) {

    BitSet tokenCoverage = new BitSet();
    // get the sorted token
    List<SToken> sortedTokenList = graph.getSortedTokenByText();
    // add all token belonging to the right text to the bit set
    ListIterator<SToken> itToken = sortedTokenList.listIterator();
    while (itToken.hasNext()) {
      SToken t = itToken.next();
      if (text == null || text == CommonHelper.getTextualDSForNode(t, graph)) {
        RelannisNodeFeature feat =
            (RelannisNodeFeature) t.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
        long tokenIndexRaw = feat.getTokenIndex();

        tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex);
        int tokenIndex = (int) (tokenIndexRaw - startTokenIndex);
        tokenCoverage.set(tokenIndex);
      }
    }

    ListIterator<GridEvent> itEvents = row.getEvents().listIterator();
    while (itEvents.hasNext()) {
      GridEvent event = itEvents.next();
      BitSet eventBitSet = new BitSet();
      eventBitSet.set(event.getLeft(), event.getRight() + 1);

      // restrict event bitset on the locations where token are present
      eventBitSet.and(tokenCoverage);

      // if there is is any 0 bit before the right border there is a break in the event
      // and we need to split it
      if (eventBitSet.nextClearBit(event.getLeft()) <= event.getRight()) {
        // remove the original event
        row.removeEvent(itEvents);

        // The event bitset now marks all the locations which the event should
        // cover.
        // Make a list of new events for each connected range in the bitset
        int subElement = 0;
        int offset = eventBitSet.nextSetBit(0);
        while (offset >= 0) {
          int end = eventBitSet.nextClearBit(offset) - 1;
          if (offset < end) {
            GridEvent newEvent = new GridEvent(event);
            newEvent.setId(event.getId() + "_islandsplit_" + subElement++);
            newEvent.setLeft(offset);
            newEvent.setRight(end);
            row.addEvent(itEvents, newEvent);
          }
          offset = eventBitSet.nextSetBit(end + 1);
        }
      } // end if we need to split
    }
  }
コード例 #3
0
ファイル: EventExtractor.java プロジェクト: korpling/ANNIS
  /**
   * Sort events of a row. The sorting is depending on the left value of the event
   *
   * @param row
   */
  private static void sortEventsByTokenIndex(Row row) {
    Collections.sort(
        row.getEvents(),
        new Comparator<GridEvent>() {
          @Override
          public int compare(GridEvent o1, GridEvent o2) {
            if (o1 == o2) {
              return 0;
            }
            if (o1 == null) {
              return -1;
            }
            if (o2 == null) {
              return +1;
            }

            return ((Integer) o1.getLeft()).compareTo(o2.getLeft());
          }
        });
  }
コード例 #4
0
ファイル: EventExtractor.java プロジェクト: korpling/ANNIS
  /**
   * Splits events of a row if they contain a gap. Gaps are found using the token index (provided as
   * ANNIS specific {@link SFeature}. Inserted events have a special style to mark them as gaps.
   *
   * @param row
   * @param graph
   * @param startTokenIndex token index of the first token in the match
   * @param endTokenIndex token index of the last token in the match
   */
  private static void splitRowsOnGaps(
      Row row, final SDocumentGraph graph, long startTokenIndex, long endTokenIndex) {
    ListIterator<GridEvent> itEvents = row.getEvents().listIterator();
    while (itEvents.hasNext()) {
      GridEvent event = itEvents.next();

      int lastTokenIndex = -1;

      // sort the coveredIDs
      LinkedList<String> sortedCoveredToken = new LinkedList<>(event.getCoveredIDs());
      Collections.sort(
          sortedCoveredToken,
          new Comparator<String>() {
            @Override
            public int compare(String o1, String o2) {
              SNode node1 = graph.getNode(o1);
              SNode node2 = graph.getNode(o2);

              if (node1 == node2) {
                return 0;
              }
              if (node1 == null) {
                return -1;
              }
              if (node2 == null) {
                return +1;
              }

              RelannisNodeFeature feat1 =
                  (RelannisNodeFeature) node1.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
              RelannisNodeFeature feat2 =
                  (RelannisNodeFeature) node2.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();

              long tokenIndex1 = feat1.getTokenIndex();
              long tokenIndex2 = feat2.getTokenIndex();

              return ((Long) (tokenIndex1)).compareTo(tokenIndex2);
            }
          });

      // first calculate all gaps
      List<GridEvent> gaps = new LinkedList<>();
      for (String id : sortedCoveredToken) {

        SNode node = graph.getNode(id);
        RelannisNodeFeature feat =
            (RelannisNodeFeature) node.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
        long tokenIndexRaw = feat.getTokenIndex();

        tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex);

        int tokenIndex = (int) (tokenIndexRaw - startTokenIndex);

        // sanity check
        if (tokenIndex >= event.getLeft() && tokenIndex <= event.getRight()) {
          int diff = tokenIndex - lastTokenIndex;

          if (lastTokenIndex >= 0 && diff > 1) {
            // we detected a gap
            GridEvent gap =
                new GridEvent(
                    event.getId() + "_gap_" + gaps.size(), lastTokenIndex + 1, tokenIndex - 1, "");
            gap.setGap(true);
            gaps.add(gap);
          }

          lastTokenIndex = tokenIndex;
        } else {
          // reset gap search when discovered there were token we use for
          // hightlighting but do not actually cover
          lastTokenIndex = -1;
        }
      } // end for each covered token id

      ListIterator<GridEvent> itGaps = gaps.listIterator();
      // remember the old right value
      int oldRight = event.getRight();

      int gapNr = 0;
      while (itGaps.hasNext()) {
        GridEvent gap = itGaps.next();

        if (gapNr == 0) {
          // shorten original event
          event.setRight(gap.getLeft() - 1);
        }

        // insert the real gap
        itEvents.add(gap);

        int rightBorder = oldRight;
        if (itGaps.hasNext()) {
          // don't use the old event right border since the gap should only go until
          // the next event
          GridEvent nextGap = itGaps.next();
          itGaps.previous();

          rightBorder = nextGap.getLeft() - 1;
        }
        // insert a new event node that covers the rest of the event
        GridEvent after = new GridEvent(event);

        after.setId(event.getId() + "_after_" + gapNr);
        after.setLeft(gap.getRight() + 1);
        after.setRight(rightBorder);

        itEvents.add(after);
        gapNr++;
      }
    }
  }
コード例 #5
0
ファイル: EventExtractor.java プロジェクト: korpling/ANNIS
  private static void addAnnotationsForNode(
      SNode node,
      SDocumentGraph graph,
      long startTokenIndex,
      long endTokenIndex,
      PDFController pdfController,
      PDFPageHelper pageNumberHelper,
      AtomicInteger eventCounter,
      LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation,
      boolean addMatch,
      Set<String> mediaLayer,
      boolean replaceValueWithMediaIcon) {

    List<String> matchedAnnos = new ArrayList<>();
    SFeature featMatchedAnnos = graph.getFeature(ANNIS_NS, FEAT_MATCHEDANNOS);
    if (featMatchedAnnos != null) {
      matchedAnnos = Splitter.on(',').trimResults().splitToList(featMatchedAnnos.getValue_STEXT());
    }
    // check if the span is a matched node
    SFeature featMatched = node.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
    Long matchRaw = featMatched == null ? null : featMatched.getValue_SNUMERIC();

    String matchedQualifiedAnnoName = "";
    if (matchRaw != null && matchRaw <= matchedAnnos.size()) {
      matchedQualifiedAnnoName = matchedAnnos.get((int) ((long) matchRaw) - 1);
    }

    // calculate the left and right values of a span
    // TODO: howto get these numbers with Salt?
    RelannisNodeFeature feat =
        (RelannisNodeFeature) node.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();

    long leftLong = feat.getLeftToken();
    long rightLong = feat.getRightToken();

    leftLong = clip(leftLong, startTokenIndex, endTokenIndex);
    rightLong = clip(rightLong, startTokenIndex, endTokenIndex);

    int left = (int) (leftLong - startTokenIndex);
    int right = (int) (rightLong - startTokenIndex);

    for (SAnnotation anno : node.getAnnotations()) {
      ArrayList<Row> rows = rowsByAnnotation.get(anno.getQName());
      if (rows == null) {
        // try again with only the name
        rows = rowsByAnnotation.get(anno.getName());
      }
      if (rows != null) {
        // only do something if the annotation was defined before

        // 1. give each annotation of each span an own row
        Row r = new Row();

        String id = "event_" + eventCounter.incrementAndGet();
        GridEvent event = new GridEvent(id, left, right, anno.getValue_STEXT());
        event.setTooltip(Helper.getQualifiedName(anno));

        if (addMatch && matchRaw != null) {
          long match = matchRaw;

          if (matchedQualifiedAnnoName.isEmpty()) {
            // always set the match when there is no matched annotation at all
            event.setMatch(match);
          }
          // check if the annotation also matches
          else if (matchedQualifiedAnnoName.equals(anno.getQName())) {
            event.setMatch(match);
          }
        }
        if (node instanceof SSpan) {
          // calculate overlapped SToken

          List<? extends SRelation<? extends SNode, ? extends SNode>> outEdges =
              graph.getOutRelations(node.getId());
          if (outEdges != null) {
            for (SRelation<? extends SNode, ? extends SNode> e : outEdges) {
              if (e instanceof SSpanningRelation) {
                SSpanningRelation spanRel = (SSpanningRelation) e;

                SToken tok = spanRel.getTarget();
                event.getCoveredIDs().add(tok.getId());

                // get the STextualDS of this token and add it to the event
                String textID = getTextID(tok, graph);
                if (textID != null) {
                  event.setTextID(textID);
                }
              }
            }
          } // end if span has out edges
        } else if (node instanceof SToken) {
          event.getCoveredIDs().add(node.getId());
          // get the STextualDS of this token and add it to the event
          String textID = getTextID((SToken) node, graph);
          if (textID != null) {
            event.setTextID(textID);
          }
        }

        // try to get time annotations
        if (mediaLayer == null || mediaLayer.contains(anno.getQName())) {

          double[] startEndTime = TimeHelper.getOverlappedTime(node);
          if (startEndTime.length == 1) {
            if (replaceValueWithMediaIcon) {
              event.setValue(" ");
              event.setTooltip("play excerpt " + event.getStartTime());
            }
            event.setStartTime(startEndTime[0]);
          } else if (startEndTime.length == 2) {
            event.setStartTime(startEndTime[0]);
            event.setEndTime(startEndTime[1]);
            if (replaceValueWithMediaIcon) {
              event.setValue(" ");
              event.setTooltip("play excerpt " + event.getStartTime() + "-" + event.getEndTime());
            }
          }
        }

        r.addEvent(event);
        rows.add(r);

        if (pdfController != null && pdfController.sizeOfRegisterdPDFViewer() > 0) {
          String page = pageNumberHelper.getPageFromAnnotation(node);
          if (page != null) {
            event.setPage(page);
          }
        }
      }
    } // end for each annotation of span
  }
コード例 #6
0
ファイル: EventExtractor.java プロジェクト: korpling/ANNIS
  public static void removeEmptySpace(
      LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation, Row tokenRow) {
    List<Range<Integer>> gaps = new LinkedList<>();

    BitSet totalOccupancyGrid = new BitSet();
    for (Map.Entry<String, ArrayList<Row>> layer : rowsByAnnotation.entrySet()) {
      for (Row r : layer.getValue()) {
        totalOccupancyGrid.or(r.getOccupancyGridCopy());
      }
    }
    // We always include the token row in the occupancy grid since it is not
    // a gap. Otherwise empty token would trigger gaps if the token list
    // is included in the visualizer output.
    // See https://github.com/korpling/ANNIS/issues/281 for the corresponding
    // bug report.
    if (tokenRow != null) {
      totalOccupancyGrid.or(tokenRow.getOccupancyGridCopy());
    }

    // The Range class can give us the next bit that is not set. Use this
    // to detect gaps. A gap starts from the next non-set bit and goes to
    // the next set bit.
    Range<Integer> gap = Range.closed(-1, totalOccupancyGrid.nextSetBit(0));
    while (true) {
      int gapStart = totalOccupancyGrid.nextClearBit(gap.upperEndpoint() + 1);
      int gapEnd = totalOccupancyGrid.nextSetBit(gapStart);
      if (gapEnd <= 0) {
        break;
      }
      gap = Range.closed(gapStart, gapEnd - 1);
      gaps.add(gap);
    }

    int gapID = 0;
    int totalOffset = 0;
    for (Range<Integer> gRaw : gaps) {
      // adjust the space range itself
      Range<Integer> g =
          Range.closed(gRaw.lowerEndpoint() - totalOffset, gRaw.upperEndpoint() - totalOffset);
      int offset = g.upperEndpoint() - g.lowerEndpoint();
      totalOffset += offset;

      for (Entry<String, ArrayList<Row>> rowEntry : rowsByAnnotation.entrySet()) {
        ArrayList<Row> rows = rowEntry.getValue();
        for (Row r : rows) {
          List<GridEvent> eventsCopy = new LinkedList<>(r.getEvents());
          for (GridEvent e : eventsCopy) {
            if (e.getLeft() >= g.upperEndpoint()) {

              r.removeEvent(e);
              e.setLeft(e.getLeft() - offset);
              e.setRight(e.getRight() - offset);
              r.addEvent(e);
            }
          }

          // add a special space event
          String spaceCaption = "";
          if ("tok".equalsIgnoreCase(rowEntry.getKey())) {
            spaceCaption = "(...)";
          }
          GridEvent spaceEvent =
              new GridEvent("gap-" + gapID, g.lowerEndpoint(), g.lowerEndpoint(), spaceCaption);
          spaceEvent.setSpace(true);
          r.addEvent(spaceEvent);
          gapID++;
        }
      }
    }
  }