Ejemplo n.º 1
0
  // Constructor::TextContext
  //
  // Generates a TextContext Object.
  //
  // Generates before and after contexts based on given length and values
  //
  // Parameters
  //
  // * primary -- TextPrimary object to be tested against context
  // * constraint -- TextConstraint object attached to this context
  // * checkSumType -- Hash for checksum
  // * contextLength - length (in chars) of the context used for testing
  //
  public TextContext(
      TextPrimary primary, TextConstraint constraint, HashType checkSumType, int contextLength) {
    super();
    this.checkSumType = checkSumType;
    //	Testing if content matches the bit-checksum tests

    this.checkSum = checkSum(primary.getContent(), checkSumType);

    int beforeStart = constraint.getStartPos() - contextLength;
    beforeStart = Math.max(0, beforeStart);
    int beforeEnd = constraint.getStartPos();

    int afterStart = constraint.getEndPos();
    int afterEnd = constraint.getEndPos() + contextLength;
    afterEnd = Math.min(primary.getContent().length(), afterEnd);

    // Evaluating how much of selected text to store
    this.totalSelectionLength = primary.getContent().length();
    int cLength = this.totalSelectionLength;
    if (this.totalSelectionLength > DEFAULT_CONTEXTLENGTH) {
      double half = (double) (this.totalSelectionLength / 2);
      cLength = (int) (Math.floor(half * percentStorage));
      this.beginSel = primary.getContent().substring(beforeEnd, (beforeEnd + cLength));
      this.endSel = primary.getContent().substring((afterStart - cLength), afterStart);
      this.totalSelection = this.beginSel.concat(this.endSel);
    } else {
      // Use the entire selection
      this.beginSel = "";
      this.endSel = "";
      this.totalSelection = primary.getContent();
    }

    this.beforeContext = primary.getContent().substring(beforeStart, beforeEnd);
    this.afterContext = primary.getContent().substring(afterStart, afterEnd);
  }
Ejemplo n.º 2
0
  private int findClosestIndexOf(String context, int oldIndex, String content) {

    Matcher matcher = Pattern.compile(Pattern.quote(context)).matcher(content);
    int index = 0;

    while (matcher.find()) {
      if (Math.abs(oldIndex - matcher.start()) < (Math.abs(oldIndex - index))) {
        index = matcher.start();
      }
    }

    return index;
  }
Ejemplo n.º 3
0
  /*
   * createGST and gstMATCH
   *
   * Methods to implement the GST-TILING methodology of matching strings.
   */
  private GST createGST(int needleLength, String haystack) {
    GST gst = new GST(haystack);
    // Needs to be at least a third of the needle length to
    // count as a match
    gst.setMinimumTileLength((int) Math.ceil(needleLength / 3));

    return gst;
  }
Ejemplo n.º 4
0
  private TextConstraint exactMatch(String primaryContent, TextConstraint originalConstraint)
      throws NoMatchFoundException {

    // find the text before the annotation
    // int startPos = primaryContent.indexOf(this.beforeContext);
    int startPos =
        findClosestIndexOf(
            this.beforeContext,
            originalConstraint.getStartPos() - this.beforeContext.length(),
            primaryContent);

    startPos += this.beforeContext.length();

    // find text after annotation
    // int endPos = primaryContent.indexOf(this.afterContext);
    int endPos =
        findClosestIndexOf(this.afterContext, originalConstraint.getEndPos(), this.totalSelection);

    if (endPos < 0 || startPos < 0) {
      // search through the selected content

      int positionTotal = 0;
      if (this.beginSel.length() > 0 && this.endSel.length() > 0) {
        int originalPosTotal =
            (originalConstraint.getStartPos()
                + (originalConstraint.getEndPos() - this.endSel.length()));

        // search beginning source selection, then end
        // selection, respectively
        int beginTotal =
            findClosestIndexOf(this.beginSel, originalConstraint.getStartPos(), primaryContent);

        int afterTotal =
            findClosestIndexOf(
                this.endSel,
                (originalConstraint.getEndPos() - this.endSel.length()),
                primaryContent);

        positionTotal = beginTotal + afterTotal;

        if (Math.abs(positionTotal - originalPosTotal) > 5) {
          return null;
        }

      } else {
        // search through total selection
        positionTotal =
            findClosestIndexOf(
                this.totalSelection, originalConstraint.getStartPos(), primaryContent);
        if (positionTotal < 0) return null;
      }
    }

    return new TextConstraint(startPos, endPos);
  }
Ejemplo n.º 5
0
  // ## TextConstraint gstMatch
  //
  // Matches a constraint within passed content. Makes sure that
  // the constraint text is found within the content, but in order to
  // be a true match, it has to match in or near the same location as
  // the original constraint
  //
  // Parameters:
  //
  // * primaryContent - String - content to find context within
  //
  // * originalConstraint - TextConstraint - object to test internal constraint against
  private TextConstraint gstMatch(String primaryContent, TextConstraint originalConstraint)
      throws NoMatchFoundException {
    GST g = createGST(this.beforeContext.length(), primaryContent);

    g.match(this.beforeContext);
    if (g.getTiles().size() == 0) throw new Context.NoMatchFoundException();
    int startMatch = -1;
    int maxMatchIndex =
        originalConstraint.getStartPos()
            - (this.beforeContext.length() + (int) Math.ceil((this.beforeContext.length() / 3)));
    int minMatchIndex =
        originalConstraint.getStartPos()
            - (int) Math.ceil(Math.ceil(this.beforeContext.length() / 3));
    int i;
    // go through each matched TILE and see
    // if the match is close to our context
    for (GSTTile item : g.getTiles()) {
      i = item.getStart();
      if (i > startMatch && i > minMatchIndex && i <= maxMatchIndex) {
        startMatch = item.getStart();
      }
    }
    // Check if no matches found
    if (startMatch < 1) throw new Context.NoMatchFoundException();

    minMatchIndex =
        originalConstraint.getEndPos()
            + (this.afterContext.length() + (int) Math.ceil((this.beforeContext.length() / 3)));
    maxMatchIndex = originalConstraint.getEndPos() + this.afterContext.length();
    int endMatch = -1;

    for (GSTTile item : g.getTiles()) {
      i = item.getStart() + item.getLength();
      if (i > endMatch && i > minMatchIndex && i <= maxMatchIndex) {
        endMatch = item.getStart();
      }
    }

    if (endMatch < 1) throw new Context.NoMatchFoundException();

    return new TextConstraint(startMatch, endMatch);
  }
Ejemplo n.º 6
0
  private ShingleCloud createSC(int needleLength, String hayStack) {

    ShingleCloud sc = new ShingleCloud(hayStack);
    sc.setTokenizer(new CharacterTokenizer());

    int nGramSize = Math.min((int) (needleLength), 20);
    sc.setNGramSize(nGramSize);

    sc.setMinimumNumberOfOnesInMatch((int) (1));
    sc.setSortMatchesByRating(true);

    return sc;
  }