/** @return ArrayList as we need a serializable object */ public ArrayList<Block> chunk(String resourceId, List<TokensLine> fragments) { List<TokensLine> filtered = Lists.newArrayList(); int i = 0; while (i < fragments.size()) { TokensLine first = fragments.get(i); int j = i + 1; while (j < fragments.size() && fragments.get(j).getValue().equals(first.getValue())) { j++; } filtered.add(fragments.get(i)); if (i < j - 1) { filtered.add(fragments.get(j - 1)); } i = j; } fragments = filtered; if (fragments.size() < blockSize) { return Lists.newArrayList(); } TokensLine[] fragmentsArr = fragments.toArray(new TokensLine[fragments.size()]); ArrayList<Block> blocks = Lists.newArrayListWithCapacity(fragmentsArr.length - blockSize + 1); long hash = 0; int first = 0; int last = 0; for (; last < blockSize - 1; last++) { hash = hash * PRIME_BASE + fragmentsArr[last].getHashCode(); } Block.Builder blockBuilder = Block.builder().setResourceId(resourceId); for (; last < fragmentsArr.length; last++, first++) { TokensLine firstFragment = fragmentsArr[first]; TokensLine lastFragment = fragmentsArr[last]; // add last statement to hash hash = hash * PRIME_BASE + lastFragment.getHashCode(); // create block Block block = blockBuilder .setBlockHash(new ByteArray(hash)) .setIndexInFile(first) .setLines(firstFragment.getStartLine(), lastFragment.getEndLine()) .setUnit(firstFragment.getStartUnit(), lastFragment.getEndUnit()) .build(); blocks.add(block); // remove first statement from hash hash -= power * firstFragment.getHashCode(); } return blocks; }
/** * Given file with two lines, containing following statements: * * <pre> * 0: A,B,A,B * 1: A,B,A * </pre> * * with block size 5 each block will span both lines, and hashes will be: * * <pre> * A,B,A,B,A=1 * B,A,B,A,B=2 * A,B,A,B,A=1 * </pre> * * Expected: one clone with two parts, which contain exactly the same lines */ @Test public void same_lines_but_different_indexes() { CloneIndex cloneIndex = createIndex(); Block.Builder block = Block.builder().setResourceId("a").setLines(0, 1); Block[] fileBlocks = new Block[] { block.setBlockHash(new ByteArray("1".getBytes())).setIndexInFile(0).build(), block.setBlockHash(new ByteArray("2".getBytes())).setIndexInFile(1).build(), block.setBlockHash(new ByteArray("1".getBytes())).setIndexInFile(2).build() }; List<CloneGroup> clones = detect(cloneIndex, fileBlocks); print(clones); assertThat(clones.size(), is(1)); Iterator<CloneGroup> clonesIterator = clones.iterator(); CloneGroup clone = clonesIterator.next(); assertThat(clone.getCloneUnitLength(), is(1)); assertThat(clone.getCloneParts().size(), is(2)); assertThat(clone.getOriginPart(), is(new ClonePart("a", 0, 0, 1))); assertThat(clone.getCloneParts(), hasItem(new ClonePart("a", 0, 0, 1))); assertThat(clone.getCloneParts(), hasItem(new ClonePart("a", 2, 0, 1))); }