@Test public void testPhraseLabelsExactMatch() { createDocuments( "data is cool", "data is cool", "data is cool", "data is cool", "data cool", "data cool"); final int[][] expectedDocumentIndices = new int[][] {new int[] {0, 1}}; documentAssigner.exactPhraseAssignment = true; documentAssigner.minClusterSize = 2; check(expectedDocumentIndices, 0); }
@Test public void testStemmedSingleWordLabelConflation() { createDocuments("cat", "cat", "cat", "cat", "cats", "cats", "cats", "cats"); final int[][] expectedDocumentIndices = new int[][] {new int[] {0, 1, 2, 3}}; documentAssigner.minClusterSize = 1; check(expectedDocumentIndices, -1); }
@Test public void testSingleWordLabels() { createDocuments("coal is", "coal is", "mining", "mining"); final int[][] expectedDocumentIndices = new int[][] {new int[] {0}, new int[] {1}}; documentAssigner.minClusterSize = 1; check(expectedDocumentIndices, -1); }
@Test public void testMinClusterSize() { createDocuments("test coal", "test coal", "coal test . mining", "coal test . mining"); final int[][] expectedDocumentIndices = new int[][] { new int[] {0, 1}, new int[] {0, 1}, new int[] {0, 1}, new int[] {0, 1} }; documentAssigner.minClusterSize = 2; check(expectedDocumentIndices, 2); }
private void check(int[][] expectedDocumentIndices, int expectedFirstPhraseIndex) { runPreprocessing(); documentAssigner.assign(context); assertThat(context.allLabels.firstPhraseIndex) .as("allLabels.firstPhraseIndex") .isEqualTo(expectedFirstPhraseIndex); assertThat(context.allLabels.documentIndices) .as("allLabels.documentIndices") .hasSize(expectedDocumentIndices.length); for (int i = 0; i < expectedDocumentIndices.length; i++) { assertThat(context.allLabels.documentIndices[i].asIntLookupContainer().toArray()) .as("allLabels.documentIndices[" + i + "]") .isEqualTo(expectedDocumentIndices[i]); } }
@Test public void testStemmedPhraseLabelConflation() { createDocuments( "cat horse", "cat horse", "cats horse", "cats horse", "cat horses", "cat horses", "cats horses", "cats horses"); final int[][] expectedDocumentIndices = new int[][] { new int[] {0, 1, 2, 3}, new int[] {0, 1, 2, 3}, new int[] {0, 1, 2, 3} }; documentAssigner.minClusterSize = 1; check(expectedDocumentIndices, 2); }