Exemple #1
0
  private static void testConcatenateParameter(
      final int constantNumber,
      int totalCount,
      AttributeMatcherType matchAlgorithm,
      String separator) {
    Map<String, ValueGenerator> generators = new HashMap<String, ValueGenerator>();
    generators.put(
        "name",
        new ValueGenerator() {

          int index = 0;

          @Override
          public int getColumnIndex() {
            return index;
          }

          @Override
          public String newValue() {
            return CONSTANTS[index++ % constantNumber];
          }
        });
    RecordGenerator recordGenerator = new RecordGenerator();
    recordGenerator.setMatchKeyMap(generators);
    Iterator<Record> iterator = new RecordIterator(totalCount, recordGenerator);
    MatchMergeAlgorithm algorithm =
        MFB.build(
            new AttributeMatcherType[] {matchAlgorithm},
            new String[] {""},
            new float[] {1},
            0,
            new SurvivorShipAlgorithmEnum[] {SurvivorShipAlgorithmEnum.CONCATENATE},
            new String[] {separator},
            new double[] {1},
            new IAttributeMatcher.NullOption[] {IAttributeMatcher.NullOption.nullMatchAll},
            new SubString[] {SubString.NO_SUBSTRING},
            "MFB");
    List<Record> mergedRecords = algorithm.execute(iterator);
    assertEquals(constantNumber, mergedRecords.size());
    int i = 0;
    for (Record mergedRecord : mergedRecords) {
      int relatedIdCount = mergedRecord.getRelatedIds().size();
      int length = separator == null ? 0 : separator.length();
      int spaceCount = ((relatedIdCount - 1) * length);
      List<Attribute> attributes = mergedRecord.getAttributes();
      assertEquals(Math.round(totalCount / constantNumber), relatedIdCount);
      assertEquals(1, attributes.size());
      Attribute attribute = attributes.get(0);
      assertEquals(
          (CONSTANTS[i].length() * relatedIdCount) + spaceCount, attribute.getValue().length());
      i++;
    }
  }
Exemple #2
0
  private static void testConstant(
      final int constantNumber, int totalCount, AttributeMatcherType matchAlgorithm) {
    Map<String, ValueGenerator> generators = new HashMap<String, ValueGenerator>();
    generators.put(
        "name",
        new ValueGenerator() {

          int index = 0;

          @Override
          public int getColumnIndex() {
            return index;
          }

          @Override
          public String newValue() {
            return CONSTANTS[index++ % constantNumber];
          }
        });
    RecordGenerator recordGenerator = new RecordGenerator();
    recordGenerator.setMatchKeyMap(generators);
    Iterator<Record> iterator = new RecordIterator(totalCount, recordGenerator);
    MatchMergeAlgorithm algorithm =
        MFB.build(
            new AttributeMatcherType[] {matchAlgorithm},
            new String[] {""},
            new float[] {1},
            0,
            new SurvivorShipAlgorithmEnum[] {SurvivorShipAlgorithmEnum.LONGEST},
            new String[] {""},
            new double[] {1},
            new IAttributeMatcher.NullOption[] {IAttributeMatcher.NullOption.nullMatchAll},
            new SubString[] {SubString.NO_SUBSTRING},
            "MFB");
    List<Record> mergedRecords = algorithm.execute(iterator);
    assertEquals(constantNumber, mergedRecords.size());
    for (Record mergedRecord : mergedRecords) {
      assertEquals(Math.round(totalCount / constantNumber), mergedRecord.getRelatedIds().size());
    }
  }
Exemple #3
0
  private static void testWeight(
      final int constantNumber, int totalCount, AttributeMatcherType matchAlgorithm) {
    Map<String, ValueGenerator> generators = new HashMap<String, ValueGenerator>();
    generators.put(
        "name",
        new ValueGenerator() {

          int index = 0;

          @Override
          public int getColumnIndex() {
            return index;
          }

          @Override
          public String newValue() {
            return CONSTANTS[index++ % constantNumber];
          }
        });
    // Runs a first match with a weight 1
    RecordGenerator recordGenerator = new RecordGenerator();
    recordGenerator.setMatchKeyMap(generators);
    Iterator<Record> iterator = new RecordIterator(totalCount, recordGenerator);
    MatchMergeAlgorithm algorithm =
        MFB.build(
            new AttributeMatcherType[] {matchAlgorithm},
            new String[] {""},
            new float[] {1},
            0,
            new SurvivorShipAlgorithmEnum[] {SurvivorShipAlgorithmEnum.LONGEST},
            new String[] {""},
            new double[] {1}, // Mark rule with a weight of 1.
            new IAttributeMatcher.NullOption[] {IAttributeMatcher.NullOption.nullMatchAll},
            new SubString[] {SubString.NO_SUBSTRING},
            "MFB");
    List<Record> mergedRecords = algorithm.execute(iterator);
    assertEquals(constantNumber, mergedRecords.size());
    long totalConfidence1 = 0;
    for (Record mergedRecord : mergedRecords) {
      assertEquals(totalCount / constantNumber, mergedRecord.getRelatedIds().size());
      totalConfidence1 += mergedRecord.getConfidence();
    }
    // Runs a second match with a weight 4
    iterator = new RecordIterator(totalCount, recordGenerator);
    algorithm =
        MFB.build(
            new AttributeMatcherType[] {matchAlgorithm},
            new String[] {""},
            new float[] {1},
            0,
            new SurvivorShipAlgorithmEnum[] {SurvivorShipAlgorithmEnum.LONGEST},
            new String[] {""},
            new double[] {
              4
            }, // Mark rule with a weight of 4 -> should not affect overall score since score is
            // normalized.
            new IAttributeMatcher.NullOption[] {IAttributeMatcher.NullOption.nullMatchAll},
            new SubString[] {SubString.NO_SUBSTRING},
            "MFB");
    mergedRecords = algorithm.execute(iterator);
    assertEquals(constantNumber, mergedRecords.size());
    long totalConfidence2 = 0;
    for (Record mergedRecord : mergedRecords) {
      assertEquals(totalCount / constantNumber, mergedRecord.getRelatedIds().size());
      totalConfidence2 += mergedRecord.getConfidence();
    }
    // ... but this shouldn't change the overall score (because score is always between 0 and 1).
    assertEquals(totalConfidence1, totalConfidence2);
  }