Пример #1
0
  private List<SemSig> getTestVectors(List<SemSig> alignmentVectors) {
    List<SemSig> testSigs = new ArrayList<SemSig>();

    for (SemSig sig : alignmentVectors) {
      testSigs.add(
          SemSigProcess.getInstance()
              .getSemSigFromOffset(sig.getOffset(), LKB.WordNetGloss, testedVectorSize));
    }

    return testSigs;
  }
Пример #2
0
  public double getSimilarity(
      String text1,
      String text2,
      DisambiguationMethod disMethod,
      SignatureComparison measure,
      ItemType srcTextType,
      ItemType trgTextType) {

    // pre-process sentence pair
    List<String> cookedSentence1 = cookLexicalItem(text1, srcTextType, discardStopwords).first;
    List<String> cookedSentence2 = cookLexicalItem(text2, trgTextType, discardStopwords).first;

    // Mirror pos tagging
    if (mirrorPOStagging && srcTextType.equals(ItemType.SURFACE)
        || trgTextType.equals(ItemType.SURFACE)) {
      Pair<List<String>, List<String>> aPair = mirrorPosTags(cookedSentence1, cookedSentence2);

      cookedSentence1 = aPair.first;
      cookedSentence2 = aPair.second;
    }

    List<SemSig> srcSemSigs = new ArrayList<SemSig>();
    List<SemSig> trgSemSigs = new ArrayList<SemSig>();

    switch (disMethod) {
      case NONE:
        // take all the synsets (or Semsigs to be consistent with others) of all the words in the
        // two sides
        srcSemSigs =
            SemSigProcess.getInstance()
                .getAllSemSigsFromWordPosList(cookedSentence1, srcTextType, testedVectorSize);
        trgSemSigs =
            SemSigProcess.getInstance()
                .getAllSemSigsFromWordPosList(cookedSentence2, trgTextType, testedVectorSize);
        break;

        // alignment-based disambiguation
        // should disambiguate the two texts and return the disambiguated SemSigs
      case ALIGNMENT_BASED:
        Pair<List<SemSig>, List<SemSig>> disambiguatedPair =
            DisambiguateCookedSentence(
                cookedSentence1,
                cookedSentence2,
                srcTextType,
                trgTextType,
                LKB.WordNetGloss,
                alignmentMeasure,
                alignmentVecSize,
                true,
                true);

        srcSemSigs = disambiguatedPair.first;
        trgSemSigs = disambiguatedPair.second;

        break;
    }

    SemSig srcSemSig =
        (srcSemSigs.size() == 1) ? srcSemSigs.get(0) : SemSigUtils.averageSemSigs(srcSemSigs);

    SemSig trgSemSig =
        (trgSemSigs.size() == 1) ? trgSemSigs.get(0) : SemSigUtils.averageSemSigs(trgSemSigs);

    return SemSigComparator.compare(
        srcSemSig.getVector(), trgSemSig.getVector(), measure, testedVectorSize, false, true);
  }