Esempio n. 1
0
 public List<Block> chunk(String resourceId, List<TokensLine> fragments) {
   if (fragments.size() < blockSize) {
     return Collections.emptyList();
   }
   TokensLine[] fragmentsArr = fragments.toArray(new TokensLine[fragments.size()]);
   List<Block> blocks = Lists.newArrayListWithCapacity(fragmentsArr.length - blockSize + 1);
   long hash = 0;
   int first = 0;
   int last = 0;
   for (; last < blockSize - 1; last++) {
     hash = hash * PRIME_BASE + fragmentsArr[last].getHashCode();
   }
   for (; last < fragmentsArr.length; last++, first++) {
     TokensLine firstFragment = fragmentsArr[first];
     TokensLine lastFragment = fragmentsArr[last];
     // add last statement to hash
     hash = hash * PRIME_BASE + lastFragment.getHashCode();
     // create block
     Block block =
         new Block(
             resourceId,
             new ByteArray(hash),
             first,
             firstFragment.getStartLine(),
             lastFragment.getEndLine());
     block.setStartUnit(firstFragment.getStartUnit());
     block.setEndUnit(lastFragment.getEndUnit());
     blocks.add(block);
     // remove first statement from hash
     hash -= power * firstFragment.getHashCode();
   }
   return blocks;
 }
Esempio n. 2
0
 /**
  * To simplify testing we assume that each block starts from a new line and contains {@link
  * #LINES_PER_BLOCK} lines, so we can simply use index and hash.
  */
 protected static Block newBlock(String resourceId, ByteArray hash, int index) {
   return Block.builder()
       .setResourceId(resourceId)
       .setBlockHash(hash)
       .setIndexInFile(index)
       .setLines(index, index + LINES_PER_BLOCK)
       .build();
 }
Esempio n. 3
0
  /** @return ArrayList as we need a serializable object */
  public ArrayList<Block> chunk(String resourceId, List<TokensLine> fragments) {
    List<TokensLine> filtered = Lists.newArrayList();
    int i = 0;
    while (i < fragments.size()) {
      TokensLine first = fragments.get(i);
      int j = i + 1;
      while (j < fragments.size() && fragments.get(j).getValue().equals(first.getValue())) {
        j++;
      }
      filtered.add(fragments.get(i));
      if (i < j - 1) {
        filtered.add(fragments.get(j - 1));
      }
      i = j;
    }
    fragments = filtered;

    if (fragments.size() < blockSize) {
      return Lists.newArrayList();
    }
    TokensLine[] fragmentsArr = fragments.toArray(new TokensLine[fragments.size()]);
    ArrayList<Block> blocks = Lists.newArrayListWithCapacity(fragmentsArr.length - blockSize + 1);
    long hash = 0;
    int first = 0;
    int last = 0;
    for (; last < blockSize - 1; last++) {
      hash = hash * PRIME_BASE + fragmentsArr[last].getHashCode();
    }
    Block.Builder blockBuilder = Block.builder().setResourceId(resourceId);
    for (; last < fragmentsArr.length; last++, first++) {
      TokensLine firstFragment = fragmentsArr[first];
      TokensLine lastFragment = fragmentsArr[last];
      // add last statement to hash
      hash = hash * PRIME_BASE + lastFragment.getHashCode();
      // create block
      Block block =
          blockBuilder
              .setBlockHash(new ByteArray(hash))
              .setIndexInFile(first)
              .setLines(firstFragment.getStartLine(), lastFragment.getEndLine())
              .setUnit(firstFragment.getStartUnit(), lastFragment.getEndUnit())
              .build();
      blocks.add(block);
      // remove first statement from hash
      hash -= power * firstFragment.getHashCode();
    }
    return blocks;
  }
  /** Constructs CloneGroup and saves it. */
  @Override
  public void endOfGroup() {
    ClonePart origin = null;

    CloneGroup.Builder builder = CloneGroup.builder().setLength(length);

    List<ClonePart> parts = Lists.newArrayListWithCapacity(count);
    for (int[] b : blockNumbers) {
      Block firstBlock = text.getBlock(b[0]);
      Block lastBlock = text.getBlock(b[1]);
      ClonePart part =
          new ClonePart(
              firstBlock.getResourceId(),
              firstBlock.getIndexInFile(),
              firstBlock.getStartLine(),
              lastBlock.getEndLine());

      // TODO Godin: maybe use FastStringComparator here ?
      if (originResourceId.equals(part.getResourceId())) {
        // part from origin
        if (origin == null) {
          origin = part;
          // To calculate length important to use the origin, because otherwise block may come from
          // DB without required data
          builder.setLengthInUnits(lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1);
        } else if (part.getUnitStart() < origin.getUnitStart()) {
          origin = part;
        }
      }

      parts.add(part);
    }

    Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
    builder.setOrigin(origin).setParts(parts);

    filter(builder.build());

    reset();
  }
Esempio n. 5
0
  /**
   * Given file with two lines, containing following statements:
   *
   * <pre>
   * 0: A,B,A,B
   * 1: A,B,A
   * </pre>
   *
   * with block size 5 each block will span both lines, and hashes will be:
   *
   * <pre>
   * A,B,A,B,A=1
   * B,A,B,A,B=2
   * A,B,A,B,A=1
   * </pre>
   *
   * Expected: one clone with two parts, which contain exactly the same lines
   */
  @Test
  public void same_lines_but_different_indexes() {
    CloneIndex cloneIndex = createIndex();
    Block.Builder block = Block.builder().setResourceId("a").setLines(0, 1);
    Block[] fileBlocks =
        new Block[] {
          block.setBlockHash(new ByteArray("1".getBytes())).setIndexInFile(0).build(),
          block.setBlockHash(new ByteArray("2".getBytes())).setIndexInFile(1).build(),
          block.setBlockHash(new ByteArray("1".getBytes())).setIndexInFile(2).build()
        };
    List<CloneGroup> clones = detect(cloneIndex, fileBlocks);

    print(clones);
    assertThat(clones.size(), is(1));
    Iterator<CloneGroup> clonesIterator = clones.iterator();

    CloneGroup clone = clonesIterator.next();
    assertThat(clone.getCloneUnitLength(), is(1));
    assertThat(clone.getCloneParts().size(), is(2));
    assertThat(clone.getOriginPart(), is(new ClonePart("a", 0, 0, 1)));
    assertThat(clone.getCloneParts(), hasItem(new ClonePart("a", 0, 0, 1)));
    assertThat(clone.getCloneParts(), hasItem(new ClonePart("a", 2, 0, 1)));
  }