/** Performs STC clustering of {@link #documents}. */ @Override public void process() throws ProcessingException { final List<Document> originalDocuments = documents; clusters = multilingualClustering.process( documents, new IMonolingualClusteringAlgorithm() { public List<Cluster> process(List<Document> documents, LanguageCode language) { BasicPreprocessing.this.documents = documents; BasicPreprocessing.this.cluster(language); return BasicPreprocessing.this.clusters; } }); documents = originalDocuments; if (multilingualClustering.languageAggregationStrategy == LanguageAggregationStrategy.FLATTEN_ALL) { Collections.sort( clusters, new Comparator<Cluster>() { public int compare(Cluster c1, Cluster c2) { if (c1.isOtherTopics()) return 1; if (c2.isOtherTopics()) return -1; if (c1.getScore() < c2.getScore()) return 1; if (c1.getScore() > c2.getScore()) return -1; if (c1.size() < c2.size()) return 1; if (c1.size() > c2.size()) return -1; return 0; } }); } }
/** Performs Lingo clustering of {@link #documents}. */ @Override @SuppressWarnings("unchecked") public void process() throws ProcessingException { // There is a tiny trick here to support multilingual clustering without // refactoring the whole component: we remember the original list of documents // and invoke clustering for each language separately within the // IMonolingualClusteringAlgorithm implementation below. This is safe because // processing components are not thread-safe by definition and // IMonolingualClusteringAlgorithm forbids concurrent execution by contract. final List<Document> originalDocuments = documents; clusters = multilingualClustering.process( documents, new IMonolingualClusteringAlgorithm() { public List<Cluster> process(List<Document> documents, LanguageCode language) { LingoClusteringAlgorithm.this.documents = documents; LingoClusteringAlgorithm.this.cluster(language); return LingoClusteringAlgorithm.this.clusters; } }); documents = originalDocuments; if (multilingualClustering.languageAggregationStrategy == LanguageAggregationStrategy.FLATTEN_ALL) { Collections.sort( clusters, Ordering.compound( Lists.newArrayList( Cluster.OTHER_TOPICS_AT_THE_END, Cluster.byReversedWeightedScoreAndSizeComparator(scoreWeight)))); } }
/** Performs STC clustering of {@link #documents}. */ @Override public void process() throws ProcessingException { // There is a tiny trick here to support multilingual clustering without // refactoring the whole component: we remember the original list of documents // and invoke clustering for each language separately within the // IMonolingualClusteringAlgorithm implementation below. This is safe because // processing components are not thread-safe by definition and // IMonolingualClusteringAlgorithm forbids concurrent execution by contract. final List<Document> originalDocuments = documents; clusters = multilingualClustering.process( documents, new IMonolingualClusteringAlgorithm() { public List<Cluster> process(List<Document> documents, LanguageCode language) { STCClusteringAlgorithm.this.documents = documents; STCClusteringAlgorithm.this.cluster(language); return STCClusteringAlgorithm.this.clusters; } }); documents = originalDocuments; // TODO: be consistent here with Lingo implementation (sort with a compound). if (multilingualClustering.languageAggregationStrategy == LanguageAggregationStrategy.FLATTEN_ALL) { Collections.sort( clusters, new Comparator<Cluster>() { public int compare(Cluster c1, Cluster c2) { if (c1.isOtherTopics() != c2.isOtherTopics()) { return c1.isOtherTopics() ? 1 : -1; } if (c1.getScore() < c2.getScore()) return 1; if (c1.getScore() > c2.getScore()) return -1; if (c1.size() < c2.size()) return 1; if (c1.size() > c2.size()) return -1; return 0; } }); } }