/** * Checks that second CloneGroup includes first one. * * <p>CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B, * which satisfy the conditions: * * <pre> * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd) * </pre> * * And all resourcesId from B exactly the same as all resourceId from A, which means that also * every part pB from B has part pA in A, which satisfy the condition: * * <pre> * pB.resourceId == pA.resourceId * </pre> * * Inclusion is the partial order, thus this relation is: * * <ul> * <li>reflexive - A in A * <li>transitive - (A in B) and (B in C) => (A in C) * <li>antisymmetric - (A in B) and (B in A) <=> (A = B) * </ul> * * <p>This method uses the fact that all parts already sorted by resourceId and unitStart (see * {@link ContainsInComparator#CLONEPART_COMPARATOR}), so running time - O(|A|+|B|). */ private static boolean containsIn(CloneGroup first, CloneGroup second) { List<ClonePart> firstParts = first.getCloneParts(); List<ClonePart> secondParts = second.getCloneParts(); // TODO Godin: according to tests seems that if first part of condition is true, then second // part can not be false // if this can be proved, then second part can be removed return SortedListsUtils.contains( secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength())) && SortedListsUtils.contains( firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR); }
/** * Given file with two lines, containing following statements: * * <pre> * 0: A,B,A,B * 1: A,B,A * </pre> * * with block size 5 each block will span both lines, and hashes will be: * * <pre> * A,B,A,B,A=1 * B,A,B,A,B=2 * A,B,A,B,A=1 * </pre> * * Expected: one clone with two parts, which contain exactly the same lines */ @Test public void same_lines_but_different_indexes() { CloneIndex cloneIndex = createIndex(); Block.Builder block = Block.builder().setResourceId("a").setLines(0, 1); Block[] fileBlocks = new Block[] { block.setBlockHash(new ByteArray("1".getBytes())).setIndexInFile(0).build(), block.setBlockHash(new ByteArray("2".getBytes())).setIndexInFile(1).build(), block.setBlockHash(new ByteArray("1".getBytes())).setIndexInFile(2).build() }; List<CloneGroup> clones = detect(cloneIndex, fileBlocks); print(clones); assertThat(clones.size(), is(1)); Iterator<CloneGroup> clonesIterator = clones.iterator(); CloneGroup clone = clonesIterator.next(); assertThat(clone.getCloneUnitLength(), is(1)); assertThat(clone.getCloneParts().size(), is(2)); assertThat(clone.getOriginPart(), is(new ClonePart("a", 0, 0, 1))); assertThat(clone.getCloneParts(), hasItem(new ClonePart("a", 0, 0, 1))); assertThat(clone.getCloneParts(), hasItem(new ClonePart("a", 2, 0, 1))); }
/** Constructs CloneGroup and saves it. */ @Override public void endOfGroup() { ClonePart origin = null; CloneGroup.Builder builder = CloneGroup.builder().setLength(length); List<ClonePart> parts = Lists.newArrayListWithCapacity(count); for (int[] b : blockNumbers) { Block firstBlock = text.getBlock(b[0]); Block lastBlock = text.getBlock(b[1]); ClonePart part = new ClonePart( firstBlock.getResourceId(), firstBlock.getIndexInFile(), firstBlock.getStartLine(), lastBlock.getEndLine()); // TODO Godin: maybe use FastStringComparator here ? if (originResourceId.equals(part.getResourceId())) { // part from origin if (origin == null) { origin = part; // To calculate length important to use the origin, because otherwise block may come from // DB without required data builder.setLengthInUnits(lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1); } else if (part.getUnitStart() < origin.getUnitStart()) { origin = part; } } parts.add(part); } Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR); builder.setOrigin(origin).setParts(parts); filter(builder.build()); reset(); }