Beispiel #1
0
 public KnnSimsCommand(
     File sourceFile,
     File destinationFile,
     Charset charset,
     SingleEnumerating indexDeligate,
     int k)
     throws IOException {
   super(sourceFile, destinationFile, charset, EnumeratingDelegates.toPair(indexDeligate));
   super.setComparator(Comparators.fallback(classComparator, nearnessComparator));
   setK(k);
 }
Beispiel #2
0
 public ObjectMergeTask() {
   setComparator(Comparators.<T>naturalOrderIfPossible());
 }
Beispiel #3
0
 public ObjectMergeTask(ObjectSource<T> sourceA, ObjectSource<T> sourceB, ObjectSink<T> sink) {
   setSourceA(sourceA);
   setSourceB(sourceB);
   setSink(sink);
   setComparator(Comparators.<T>naturalOrderIfPossible());
 }
Beispiel #4
0
 @Override
 public Comparator<Weighted<TokenPair>> getComparator() {
   return isReverse() ? Comparators.reverse(getCombinedComparator()) : getCombinedComparator();
 }
Beispiel #5
0
 public Comparator<Weighted<TokenPair>> getCombinedComparator() {
   return Comparators.fallback(getClassComparator(), getNearnessComparator());
 }
Beispiel #6
0
/**
 * Task that read in a file and produces the k-nearest-neighbors for each base entry. Assumes the
 * file is composed of entry, entry, weight triples that are delimited by tabs.
 *
 * @author Hamish I A Morgan &lt;[email protected]&gt;
 */
@Parameters(commandDescription = "Perform k-nearest-neighbours on a similarity file.")
public final class KnnSimsCommand extends SortEventsCommand {

  private static final Log LOG = LogFactory.getLog(KnnSimsCommand.class);

  @Parameter(
      names = {"-k"},
      description = "The maximum number of neighbours to produce per word.")
  private int k = ExternalKnnSimsCommand.DEFAULT_K;

  private Comparator<Weighted<TokenPair>> classComparator =
      Weighted.recordOrder(TokenPair.firstIndexOrder());

  private Comparator<Weighted<TokenPair>> nearnessComparator =
      Comparators.reverse(Weighted.<TokenPair>weightOrder());

  public KnnSimsCommand(
      File sourceFile,
      File destinationFile,
      Charset charset,
      SingleEnumerating indexDeligate,
      int k)
      throws IOException {
    super(sourceFile, destinationFile, charset, EnumeratingDelegates.toPair(indexDeligate));
    super.setComparator(Comparators.fallback(classComparator, nearnessComparator));
    setK(k);
  }

  public KnnSimsCommand() {
    setK(100);
  }

  public Comparator<Weighted<TokenPair>> getCombinedComparator() {
    return Comparators.fallback(getClassComparator(), getNearnessComparator());
  }

  @Override
  public Comparator<Weighted<TokenPair>> getComparator() {
    return isReverse() ? Comparators.reverse(getCombinedComparator()) : getCombinedComparator();
  }

  @Override
  @Deprecated
  public void setComparator(Comparator<Weighted<TokenPair>> comparator) {
    throw new UnsupportedOperationException(
        "Class and nearness comparators should be set instead.");
  }

  public Comparator<Weighted<TokenPair>> getClassComparator() {
    return classComparator;
  }

  public void setClassComparator(Comparator<Weighted<TokenPair>> classComparator) {
    this.classComparator = classComparator;
  }

  public Comparator<Weighted<TokenPair>> getNearnessComparator() {
    return nearnessComparator;
  }

  public void setNearnessComparator(Comparator<Weighted<TokenPair>> nearnessComparator) {
    this.nearnessComparator = nearnessComparator;
  }

  public final int getK() {
    return k;
  }

  public final void setK(int k) {
    if (k < 1) throw new IllegalArgumentException("k < 1");
    this.k = k;
  }

  private boolean first = false;

  @Override
  protected ObjectSource<Weighted<TokenPair>> openSource(File file)
      throws FileNotFoundException, IOException {
    final ObjectSource<Weighted<TokenPair>> src =
        first
            ? BybloIO.openSimsSource(file, getCharset(), getIndexDelegate())
            : BybloIO.openNeighboursSource(file, getCharset(), getIndexDelegate());
    first = true;
    return src;
  }

  @Override
  protected ObjectSink<Weighted<TokenPair>> openSink(File file)
      throws FileNotFoundException, IOException {
    return new KFirstReducingObjectSink<Weighted<TokenPair>>(
        new WeightSumReducerObjectSink<TokenPair>(
            BybloIO.openNeighboursSink(file, getCharset(), getIndexDelegate())),
        classComparator,
        k);
  }

  @Override
  protected Objects.ToStringHelper toStringHelper() {
    return super.toStringHelper().add("k", k);
  }
}