private static void testConcatenateParameter( final int constantNumber, int totalCount, AttributeMatcherType matchAlgorithm, String separator) { Map<String, ValueGenerator> generators = new HashMap<String, ValueGenerator>(); generators.put( "name", new ValueGenerator() { int index = 0; @Override public int getColumnIndex() { return index; } @Override public String newValue() { return CONSTANTS[index++ % constantNumber]; } }); RecordGenerator recordGenerator = new RecordGenerator(); recordGenerator.setMatchKeyMap(generators); Iterator<Record> iterator = new RecordIterator(totalCount, recordGenerator); MatchMergeAlgorithm algorithm = MFB.build( new AttributeMatcherType[] {matchAlgorithm}, new String[] {""}, new float[] {1}, 0, new SurvivorShipAlgorithmEnum[] {SurvivorShipAlgorithmEnum.CONCATENATE}, new String[] {separator}, new double[] {1}, new IAttributeMatcher.NullOption[] {IAttributeMatcher.NullOption.nullMatchAll}, new SubString[] {SubString.NO_SUBSTRING}, "MFB"); List<Record> mergedRecords = algorithm.execute(iterator); assertEquals(constantNumber, mergedRecords.size()); int i = 0; for (Record mergedRecord : mergedRecords) { int relatedIdCount = mergedRecord.getRelatedIds().size(); int length = separator == null ? 0 : separator.length(); int spaceCount = ((relatedIdCount - 1) * length); List<Attribute> attributes = mergedRecord.getAttributes(); assertEquals(Math.round(totalCount / constantNumber), relatedIdCount); assertEquals(1, attributes.size()); Attribute attribute = attributes.get(0); assertEquals( (CONSTANTS[i].length() * relatedIdCount) + spaceCount, attribute.getValue().length()); i++; } }
private static void testConstant( final int constantNumber, int totalCount, AttributeMatcherType matchAlgorithm) { Map<String, ValueGenerator> generators = new HashMap<String, ValueGenerator>(); generators.put( "name", new ValueGenerator() { int index = 0; @Override public int getColumnIndex() { return index; } @Override public String newValue() { return CONSTANTS[index++ % constantNumber]; } }); RecordGenerator recordGenerator = new RecordGenerator(); recordGenerator.setMatchKeyMap(generators); Iterator<Record> iterator = new RecordIterator(totalCount, recordGenerator); MatchMergeAlgorithm algorithm = MFB.build( new AttributeMatcherType[] {matchAlgorithm}, new String[] {""}, new float[] {1}, 0, new SurvivorShipAlgorithmEnum[] {SurvivorShipAlgorithmEnum.LONGEST}, new String[] {""}, new double[] {1}, new IAttributeMatcher.NullOption[] {IAttributeMatcher.NullOption.nullMatchAll}, new SubString[] {SubString.NO_SUBSTRING}, "MFB"); List<Record> mergedRecords = algorithm.execute(iterator); assertEquals(constantNumber, mergedRecords.size()); for (Record mergedRecord : mergedRecords) { assertEquals(Math.round(totalCount / constantNumber), mergedRecord.getRelatedIds().size()); } }
private static void testWeight( final int constantNumber, int totalCount, AttributeMatcherType matchAlgorithm) { Map<String, ValueGenerator> generators = new HashMap<String, ValueGenerator>(); generators.put( "name", new ValueGenerator() { int index = 0; @Override public int getColumnIndex() { return index; } @Override public String newValue() { return CONSTANTS[index++ % constantNumber]; } }); // Runs a first match with a weight 1 RecordGenerator recordGenerator = new RecordGenerator(); recordGenerator.setMatchKeyMap(generators); Iterator<Record> iterator = new RecordIterator(totalCount, recordGenerator); MatchMergeAlgorithm algorithm = MFB.build( new AttributeMatcherType[] {matchAlgorithm}, new String[] {""}, new float[] {1}, 0, new SurvivorShipAlgorithmEnum[] {SurvivorShipAlgorithmEnum.LONGEST}, new String[] {""}, new double[] {1}, // Mark rule with a weight of 1. new IAttributeMatcher.NullOption[] {IAttributeMatcher.NullOption.nullMatchAll}, new SubString[] {SubString.NO_SUBSTRING}, "MFB"); List<Record> mergedRecords = algorithm.execute(iterator); assertEquals(constantNumber, mergedRecords.size()); long totalConfidence1 = 0; for (Record mergedRecord : mergedRecords) { assertEquals(totalCount / constantNumber, mergedRecord.getRelatedIds().size()); totalConfidence1 += mergedRecord.getConfidence(); } // Runs a second match with a weight 4 iterator = new RecordIterator(totalCount, recordGenerator); algorithm = MFB.build( new AttributeMatcherType[] {matchAlgorithm}, new String[] {""}, new float[] {1}, 0, new SurvivorShipAlgorithmEnum[] {SurvivorShipAlgorithmEnum.LONGEST}, new String[] {""}, new double[] { 4 }, // Mark rule with a weight of 4 -> should not affect overall score since score is // normalized. new IAttributeMatcher.NullOption[] {IAttributeMatcher.NullOption.nullMatchAll}, new SubString[] {SubString.NO_SUBSTRING}, "MFB"); mergedRecords = algorithm.execute(iterator); assertEquals(constantNumber, mergedRecords.size()); long totalConfidence2 = 0; for (Record mergedRecord : mergedRecords) { assertEquals(totalCount / constantNumber, mergedRecord.getRelatedIds().size()); totalConfidence2 += mergedRecord.getConfidence(); } // ... but this shouldn't change the overall score (because score is always between 0 and 1). assertEquals(totalConfidence1, totalConfidence2); }