/** * Performs a multi-pass diff between the base and witness documents provided. This first performs * a normal diff, and then takes all the ares that are marked as change blocks and diffs them * against one another. The differences are integrated into a single set of differences, providing * a much higher resolution result set. * * @return A <code>DifferenceSet</code> containing the differences between the two documents. */ public MultiPassDiff(DocumentModel baseDocument, DocumentModel witnessDocument) { diff = new DiffAlgorithm(); DifferenceSet diffSet = diff.diffDocuments(baseDocument, witnessDocument); DifferenceConsolidator differenceConsolidator = new DifferenceConsolidator(baseDocument, witnessDocument); differenceConsolidator.consolidateDifferences(diffSet); /** * Keeps doing a diff on the change blocks as long as a large change block is getting broken * down, and the max number of passes haven't been made. */ previousLargestChangeBlock = Integer.MAX_VALUE; int iterationCount = 0; while (iterationCount < MAX_DIFF_PASSES && diffSet.getLargestChangeBlock() < previousLargestChangeBlock) { iterationCount++; // System.out.println("Iteration number: " + iterationCount); // System.out.println("Largest Change Block: " + diffSet.getLargestChangeBlock()); // System.out.println("Previous Largest Change Block: " + previousLargestChangeBlock); differenceSet = makeAdditionalPass(diffSet, baseDocument, witnessDocument); differenceConsolidator.consolidateDifferences(differenceSet); differenceConsolidator.consolidateInsertDelete(differenceSet); // if we are going to look at these differences later, // the real diff algorithm needs the latest version of the changes. diff.updateDifferenceSet(differenceSet); previousLargestChangeBlock = diffSet.getLargestChangeBlock(); diffSet = differenceSet; } differenceConsolidator.consolidateDifferences(differenceSet); differenceConsolidator.consolidateInsertDelete(differenceSet); }
/** * Set the repository the formatter can load object contents from. * * <p>Once a repository has been set, the formatter must be released to ensure the internal * ObjectReader is able to release its resources. * * @param repository source repository holding referenced objects. */ public void setRepository(Repository repository) { if (reader != null) reader.release(); db = repository; reader = db.newObjectReader(); ContentSource cs = ContentSource.create(reader); source = new ContentSource.Pair(cs, cs); DiffConfig dc = db.getConfig().get(DiffConfig.KEY); if (dc.isNoPrefix()) { setOldPrefix(""); // $NON-NLS-1$ setNewPrefix(""); // $NON-NLS-1$ } setDetectRenames(dc.isRenameDetectionEnabled()); diffAlgorithm = DiffAlgorithm.getAlgorithm( db.getConfig() .getEnum( ConfigConstants.CONFIG_DIFF_SECTION, null, ConfigConstants.CONFIG_KEY_ALGORITHM, SupportedAlgorithm.HISTOGRAM)); }
private DifferenceSet makeAdditionalPass( DifferenceSet originalDifferenceSet, DocumentModel baseDocument, DocumentModel witnessDocument) { DifferenceSet refinedDifferenceSet = new DifferenceSet(); refinedDifferenceSet.setBaseDocument(baseDocument); refinedDifferenceSet.setWitnessDocument(witnessDocument); refinedDifferenceSet.setNumberOfSymbols(originalDifferenceSet.getNumberOfSymbols()); for (Iterator i = originalDifferenceSet.getDifferenceList().iterator(); i.hasNext(); ) { Difference difference = (Difference) i.next(); DiffAlgorithm subdiff = new DiffAlgorithm(); if (difference.getType() == Difference.CHANGE) { // SubdocumentModel represents a substring of an original DocumentModel with its xml source // metadata (if it exists) DocumentModel subBaseText = new DocumentModel( baseDocument, difference.getOffset(Difference.BASE), difference.getOffset(Difference.BASE) + difference.getLength(Difference.BASE)); DocumentModel subWitnessText = new DocumentModel( witnessDocument, difference.getOffset(Difference.WITNESS), difference.getOffset(Difference.WITNESS) + difference.getLength(Difference.WITNESS)); // perform a diff on the text with the change blocks subBaseText.tokenize(baseDocument.getTokenizerSettings()); subWitnessText.tokenize(witnessDocument.getTokenizerSettings()); DifferenceSet subDifferences = subdiff.diffDocuments(subBaseText, subWitnessText); // add the resulting differences to the new difference set. addSubDifferences(refinedDifferenceSet, subDifferences, difference); } else { refinedDifferenceSet.addDifference(difference); } } return refinedDifferenceSet; }
private EditList diff(RawText a, RawText b) { return diffAlgorithm.diff(comparator, a, b); }
public int getWitnessOffset(int baseOffset, boolean getEnd) { return diff.getCorrespondingWitnessOffset(baseOffset, getEnd); }
// Given the offset in one document, this finds the offset of that character in the other // document. // If there is a perfect match, then it is easy to know what to return. // If it doesn't appear in the other document, then return it's insert point. // If it is part of a change, return -1. public int getBaseOffset(int witnessOffset, boolean getEnd) { return diff.getCorrespondingBaseOffset(witnessOffset, getEnd); }
/** * compute the difference between the original and a revision. * * @param rev the revision to compare with the original. * @return a Revision describing the differences */ public Revision diff(Object[] rev) throws DifferentiationFailedException { if (orig.length == 0 && rev.length == 0) return new Revision(); else return algorithm.diff(orig, rev); }