Esempio n. 1
0
  /** @return ArrayList as we need a serializable object */
  public ArrayList<Block> chunk(String resourceId, List<TokensLine> fragments) {
    List<TokensLine> filtered = Lists.newArrayList();
    int i = 0;
    while (i < fragments.size()) {
      TokensLine first = fragments.get(i);
      int j = i + 1;
      while (j < fragments.size() && fragments.get(j).getValue().equals(first.getValue())) {
        j++;
      }
      filtered.add(fragments.get(i));
      if (i < j - 1) {
        filtered.add(fragments.get(j - 1));
      }
      i = j;
    }
    fragments = filtered;

    if (fragments.size() < blockSize) {
      return Lists.newArrayList();
    }
    TokensLine[] fragmentsArr = fragments.toArray(new TokensLine[fragments.size()]);
    ArrayList<Block> blocks = Lists.newArrayListWithCapacity(fragmentsArr.length - blockSize + 1);
    long hash = 0;
    int first = 0;
    int last = 0;
    for (; last < blockSize - 1; last++) {
      hash = hash * PRIME_BASE + fragmentsArr[last].getHashCode();
    }
    Block.Builder blockBuilder = Block.builder().setResourceId(resourceId);
    for (; last < fragmentsArr.length; last++, first++) {
      TokensLine firstFragment = fragmentsArr[first];
      TokensLine lastFragment = fragmentsArr[last];
      // add last statement to hash
      hash = hash * PRIME_BASE + lastFragment.getHashCode();
      // create block
      Block block =
          blockBuilder
              .setBlockHash(new ByteArray(hash))
              .setIndexInFile(first)
              .setLines(firstFragment.getStartLine(), lastFragment.getEndLine())
              .setUnit(firstFragment.getStartUnit(), lastFragment.getEndUnit())
              .build();
      blocks.add(block);
      // remove first statement from hash
      hash -= power * firstFragment.getHashCode();
    }
    return blocks;
  }
Esempio n. 2
0
  /**
   * Given file with two lines, containing following statements:
   *
   * <pre>
   * 0: A,B,A,B
   * 1: A,B,A
   * </pre>
   *
   * with block size 5 each block will span both lines, and hashes will be:
   *
   * <pre>
   * A,B,A,B,A=1
   * B,A,B,A,B=2
   * A,B,A,B,A=1
   * </pre>
   *
   * Expected: one clone with two parts, which contain exactly the same lines
   */
  @Test
  public void same_lines_but_different_indexes() {
    CloneIndex cloneIndex = createIndex();
    Block.Builder block = Block.builder().setResourceId("a").setLines(0, 1);
    Block[] fileBlocks =
        new Block[] {
          block.setBlockHash(new ByteArray("1".getBytes())).setIndexInFile(0).build(),
          block.setBlockHash(new ByteArray("2".getBytes())).setIndexInFile(1).build(),
          block.setBlockHash(new ByteArray("1".getBytes())).setIndexInFile(2).build()
        };
    List<CloneGroup> clones = detect(cloneIndex, fileBlocks);

    print(clones);
    assertThat(clones.size(), is(1));
    Iterator<CloneGroup> clonesIterator = clones.iterator();

    CloneGroup clone = clonesIterator.next();
    assertThat(clone.getCloneUnitLength(), is(1));
    assertThat(clone.getCloneParts().size(), is(2));
    assertThat(clone.getOriginPart(), is(new ClonePart("a", 0, 0, 1)));
    assertThat(clone.getCloneParts(), hasItem(new ClonePart("a", 0, 0, 1)));
    assertThat(clone.getCloneParts(), hasItem(new ClonePart("a", 2, 0, 1)));
  }