// Constructor::TextContext // // Generates a TextContext Object. // // Generates before and after contexts based on given length and values // // Parameters // // * primary -- TextPrimary object to be tested against context // * constraint -- TextConstraint object attached to this context // * checkSumType -- Hash for checksum // * contextLength - length (in chars) of the context used for testing // public TextContext( TextPrimary primary, TextConstraint constraint, HashType checkSumType, int contextLength) { super(); this.checkSumType = checkSumType; // Testing if content matches the bit-checksum tests this.checkSum = checkSum(primary.getContent(), checkSumType); int beforeStart = constraint.getStartPos() - contextLength; beforeStart = Math.max(0, beforeStart); int beforeEnd = constraint.getStartPos(); int afterStart = constraint.getEndPos(); int afterEnd = constraint.getEndPos() + contextLength; afterEnd = Math.min(primary.getContent().length(), afterEnd); // Evaluating how much of selected text to store this.totalSelectionLength = primary.getContent().length(); int cLength = this.totalSelectionLength; if (this.totalSelectionLength > DEFAULT_CONTEXTLENGTH) { double half = (double) (this.totalSelectionLength / 2); cLength = (int) (Math.floor(half * percentStorage)); this.beginSel = primary.getContent().substring(beforeEnd, (beforeEnd + cLength)); this.endSel = primary.getContent().substring((afterStart - cLength), afterStart); this.totalSelection = this.beginSel.concat(this.endSel); } else { // Use the entire selection this.beginSel = ""; this.endSel = ""; this.totalSelection = primary.getContent(); } this.beforeContext = primary.getContent().substring(beforeStart, beforeEnd); this.afterContext = primary.getContent().substring(afterStart, afterEnd); }
private int findClosestIndexOf(String context, int oldIndex, String content) { Matcher matcher = Pattern.compile(Pattern.quote(context)).matcher(content); int index = 0; while (matcher.find()) { if (Math.abs(oldIndex - matcher.start()) < (Math.abs(oldIndex - index))) { index = matcher.start(); } } return index; }
/* * createGST and gstMATCH * * Methods to implement the GST-TILING methodology of matching strings. */ private GST createGST(int needleLength, String haystack) { GST gst = new GST(haystack); // Needs to be at least a third of the needle length to // count as a match gst.setMinimumTileLength((int) Math.ceil(needleLength / 3)); return gst; }
private TextConstraint exactMatch(String primaryContent, TextConstraint originalConstraint) throws NoMatchFoundException { // find the text before the annotation // int startPos = primaryContent.indexOf(this.beforeContext); int startPos = findClosestIndexOf( this.beforeContext, originalConstraint.getStartPos() - this.beforeContext.length(), primaryContent); startPos += this.beforeContext.length(); // find text after annotation // int endPos = primaryContent.indexOf(this.afterContext); int endPos = findClosestIndexOf(this.afterContext, originalConstraint.getEndPos(), this.totalSelection); if (endPos < 0 || startPos < 0) { // search through the selected content int positionTotal = 0; if (this.beginSel.length() > 0 && this.endSel.length() > 0) { int originalPosTotal = (originalConstraint.getStartPos() + (originalConstraint.getEndPos() - this.endSel.length())); // search beginning source selection, then end // selection, respectively int beginTotal = findClosestIndexOf(this.beginSel, originalConstraint.getStartPos(), primaryContent); int afterTotal = findClosestIndexOf( this.endSel, (originalConstraint.getEndPos() - this.endSel.length()), primaryContent); positionTotal = beginTotal + afterTotal; if (Math.abs(positionTotal - originalPosTotal) > 5) { return null; } } else { // search through total selection positionTotal = findClosestIndexOf( this.totalSelection, originalConstraint.getStartPos(), primaryContent); if (positionTotal < 0) return null; } } return new TextConstraint(startPos, endPos); }
// ## TextConstraint gstMatch // // Matches a constraint within passed content. Makes sure that // the constraint text is found within the content, but in order to // be a true match, it has to match in or near the same location as // the original constraint // // Parameters: // // * primaryContent - String - content to find context within // // * originalConstraint - TextConstraint - object to test internal constraint against private TextConstraint gstMatch(String primaryContent, TextConstraint originalConstraint) throws NoMatchFoundException { GST g = createGST(this.beforeContext.length(), primaryContent); g.match(this.beforeContext); if (g.getTiles().size() == 0) throw new Context.NoMatchFoundException(); int startMatch = -1; int maxMatchIndex = originalConstraint.getStartPos() - (this.beforeContext.length() + (int) Math.ceil((this.beforeContext.length() / 3))); int minMatchIndex = originalConstraint.getStartPos() - (int) Math.ceil(Math.ceil(this.beforeContext.length() / 3)); int i; // go through each matched TILE and see // if the match is close to our context for (GSTTile item : g.getTiles()) { i = item.getStart(); if (i > startMatch && i > minMatchIndex && i <= maxMatchIndex) { startMatch = item.getStart(); } } // Check if no matches found if (startMatch < 1) throw new Context.NoMatchFoundException(); minMatchIndex = originalConstraint.getEndPos() + (this.afterContext.length() + (int) Math.ceil((this.beforeContext.length() / 3))); maxMatchIndex = originalConstraint.getEndPos() + this.afterContext.length(); int endMatch = -1; for (GSTTile item : g.getTiles()) { i = item.getStart() + item.getLength(); if (i > endMatch && i > minMatchIndex && i <= maxMatchIndex) { endMatch = item.getStart(); } } if (endMatch < 1) throw new Context.NoMatchFoundException(); return new TextConstraint(startMatch, endMatch); }
private ShingleCloud createSC(int needleLength, String hayStack) { ShingleCloud sc = new ShingleCloud(hayStack); sc.setTokenizer(new CharacterTokenizer()); int nGramSize = Math.min((int) (needleLength), 20); sc.setNGramSize(nGramSize); sc.setMinimumNumberOfOnesInMatch((int) (1)); sc.setSortMatchesByRating(true); return sc; }