public String expandQuery(String originalQuery) {
    StringBuilder result = new StringBuilder();

    List<TaggedToken> taggedTokens = tagger.tokenizeAndTag(originalQuery);

    for (TaggedToken token : taggedTokens) {
      if (token.tag.matches("N|O|^|S|Z|L|M|Y|X|!")) {
        result.append(" ");
        result.append(token.token);
        Synset[] synsets = database.getSynsets(token.token, SynsetType.NOUN);
        if (synsets.length > 0) {
          String[] temp = synsets[0].getWordForms();
          for (int j = 0; j < Math.min(temp.length, 3); ++j) {
            result.append(" ");
            result.append(temp[j]);
          }
        }
      } else if (token.tag.matches("V|T")) {
        result.append(" ");
        result.append(token.token);
        Synset[] synsets = database.getSynsets(token.token, SynsetType.VERB);
        if (synsets.length > 0) {
          String[] temp = synsets[0].getWordForms();
          for (int j = 0; j < Math.min(temp.length, 3); ++j) {
            result.append(" ");
            result.append(temp[j]);
          }
        }
      } else if (token.tag.matches("A")) {
        result.append(" ");
        result.append(token.token);
        Synset[] synsets = database.getSynsets(token.token, SynsetType.ADJECTIVE);
        if (synsets.length > 0) {
          String[] temp = synsets[0].getWordForms();
          for (int j = 0; j < Math.min(temp.length, 3); ++j) {
            result.append(" ");
            result.append(temp[j]);
          }
        }
      } else if (token.tag.matches("R")) {
        result.append(" ");
        result.append(token.token);
        Synset[] synsets = database.getSynsets(token.token, SynsetType.ADVERB);
        if (synsets.length > 0) {
          String[] temp = synsets[0].getWordForms();
          for (int j = 0; j < Math.min(temp.length, 3); ++j) {
            result.append(" ");
            result.append(temp[j]);
          }
        }
      }
    }
    logger.info(
        "Before expansion, query = "
            + originalQuery
            + ", after expansion, query = "
            + result.toString().trim());
    return result.toString().trim();
  }
Пример #2
0
  /**
   * Check if the keyword is in dictionary
   *
   * @param word - the keyword
   * @return <tt>true</tt> - if it is in dictionary, <tt>false</tt> - if not in dictionary
   */
  public static boolean isAWord(String word) {

    WordNetDatabase database = WordNetDatabase.getFileInstance();
    Synset[] synsetNoun = database.getSynsets(word, SynsetType.NOUN);
    Synset[] synsetVerb = database.getSynsets(word, SynsetType.VERB);
    //		System.err.println(synsetNoun.length + synsetVerb.length);

    if (synsetNoun.length + synsetVerb.length > 0) return true;
    else return false;
  }
Пример #3
0
  public boolean isNoun(String word) {
    for (Synset s : database.getSynsets(word)) if (s.getType().equals(SynsetType.NOUN)) return true;

    if (LexicalInflector.isPlural(word)) return isNoun(LexicalInflector.singularize(word));

    return false;
  }
Пример #4
0
  private String findSynonym(ArrayList<Word> sentence, int indexToReplace /*<Word wordIn>*/) {
    /**
     * Takes in one String of a word, hopefully only one word(will truncate after space), and
     * searches for a suitable synonym
     *
     * <p>*
     */
    String output = "";
    // String word;
    // word=wordIn.getString();
    String word = sentence.get(indexToReplace).getValue();
    Synset[] synsets = database.getSynsets(word);

    synonyms.clear();

    if (synsets.length > 0) {

      for (int i = 0; i < synsets.length; i++) {

        String[] wordForms = synsets[i].getWordForms();
        for (int j = 0; j < wordForms.length; j++) {
          if (wordForms[j].contains(" ")) {
            System.out.println(wordForms[j]);
          } else if (!wordForms[j].equals(word)) synonyms.add(wordForms[j]);
        }
      }

      // FIND IF PLURAL OR NO HERE

      /////////////////////////// ###########################////////////////////

      Collections.sort(synonyms, myLengthComparator);
      // synonyms is now sorted as longest first

      // now we have a list of all the terms that can replace the word.
      // We need to check them in the lexical analyzer
      //			System.out.println("kkkk");
      if (synonyms.size() == 0) return word;
      while (!LexicalAnalyzer(sentence, indexToReplace, synonyms.get(0))) {
        //			System.out.println("1");
        synonyms.remove(0);
        if (synonyms.size() == 0) return word; // did not find synonym

        if (synonyms.get(0).length() <= word.length()) {
          return word;
          // do not replace word, synonyms are shorter
        }
      }

      ////////////// ############# PLACEHOLDER CODE
      if (synonyms.size() > 0) output = synonyms.get(0);
      else output = word;
      ///////////// ################# PLACEHOLDER CODE
    } else { // There are no other known synonyms in our wordnet database.
      output = word;
    }

    return output;
  }
Пример #5
0
  public ParseEssay() {
    System.setProperty("wordnet.database.dir", "../war/dict");
    synonyms = new ArrayList<String>();
    database = WordNetDatabase.getFileInstance();
    baos = new ByteArrayOutputStream();
    lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");

    // ??
  }
  /**
   * get synonym according to the lemma and its synset type
   *
   * @param lemma
   * @param type
   * @return
   */
  public Set<String> getSynonym(String lemma, SynsetType type) {
    Set<String> synonyms = new HashSet<String>();
    Synset[] synsets = wordnet.getSynsets(lemma, type);
    for (Synset synset : synsets) {
      String[] wordforms = synset.getWordForms();
      synonyms.addAll(Arrays.asList(wordforms));
    }

    return synonyms;
  }
Пример #7
0
  public List<String> getExamples(String concept) {
    List<String> examples = new ArrayList<String>();

    Synset[] synsets = database.getSynsets(concept);
    for (Synset synset : synsets) {
      for (String s : synset.getUsageExamples()) examples.add(s);
    }

    return examples;
  }
Пример #8
0
  public Wordnet() {
    super();

    // String wordnet_location = getPathDict();
    // System.setProperty("wordnet.database.dir", wordnet_location);
    System.setProperty(
        "wordnet.database.dir", "/Users/angel/Desktop/workspace-miso/metaRDF.core/dict");

    database = WordNetDatabase.getFileInstance();
  }
Пример #9
0
  public List<String> getMeanings(String concept) {
    List<String> meanings = new ArrayList<String>();

    Synset[] synsets = database.getSynsets(concept);
    for (Synset synset : synsets) {
      meanings.add(synset.getDefinition());
    }

    return meanings;
  }
Пример #10
0
  public List<String> getDefinitions(String concept) {
    List<String> definitions = new ArrayList<String>();
    Synset[] synsets = database.getSynsets(concept);

    if (synsets.length > 0) {
      for (int i = 0; i < synsets.length; i++) {
        definitions.add(synsets[i].getDefinition());
      }
    } else return null;
    return definitions;
  }
Пример #11
0
  public List<String> getDerivation(String concept1, String concept2) {
    List<String> derivation = new ArrayList<String>();

    Synset[] synsets = database.getSynsets(concept1);
    for (Synset synset : synsets) {
      for (WordSense wordsense : synset.getDerivationallyRelatedForms(concept2))
        derivation.add(wordsense.getWordForm());
    }

    return derivation;
  }
  /**
   * get derivationally form
   *
   * @param lemma
   * @param type
   * @return
   */
  public Set<String> getDerivationallyRelatedForms(String lemma, SynsetType type) {
    Set<String> derivationallyForm = new HashSet<String>();
    Synset[] synsets = wordnet.getSynsets(lemma, type);
    for (Synset synset : synsets) {
      WordSense[] senses = synset.getDerivationallyRelatedForms(lemma);
      for (WordSense sense : senses) {
        derivationallyForm.add(sense.getWordForm());
      }
    }

    return derivationallyForm;
  }
  /**
   * get noun hypernym
   *
   * @param lemma
   * @param type
   * @return
   */
  public Set<String> getNounHypernym(String lemma) {
    Set<String> hypernyms = new HashSet<String>();
    Synset[] synsets = wordnet.getSynsets(lemma, SynsetType.NOUN);
    for (Synset synset : synsets) {
      NounSynset nounSynset = (NounSynset) synset;
      NounSynset[] hypernymSynset = nounSynset.getHypernyms();
      for (NounSynset set : hypernymSynset) {
        hypernyms.addAll(Arrays.asList(set.getWordForms()));
      }
    }

    return hypernyms;
  }
  /**
   * get verb hypernym
   *
   * @param lemma
   * @param type
   * @return
   */
  public Set<String> getVerbHypernym(String lemma) {
    Set<String> hypernyms = new HashSet<String>();
    Synset[] synsets = wordnet.getSynsets(lemma, SynsetType.VERB);
    for (Synset synset : synsets) {
      VerbSynset verbSynset = (VerbSynset) synset;
      VerbSynset[] hypernymSynset = verbSynset.getHypernyms();
      for (VerbSynset set : hypernymSynset) {
        hypernyms.addAll(Arrays.asList(set.getWordForms()));
      }
    }

    return hypernyms;
  }
Пример #15
0
 public List<String> getSynonymsByDefition(String concept, String definition) {
   List<String> synonyms = new ArrayList<String>();
   Synset[] synsets = database.getSynsets(concept);
   if (synsets.length > 0) {
     for (int i = 0; i < synsets.length; i++) {
       if (synsets[i].getDefinition().equals(definition)) {
         String[] words = synsets[i].getWordForms();
         for (int j = 0; j < words.length; j++) {
           synonyms.add(words[j]);
         }
         break;
       }
     }
   } else return null;
   return synonyms;
 }
Пример #16
0
  public Map<String, Tuple<String[], String[]>> getSynonymsProposal(String concept) {
    Map<String, Tuple<String[], String[]>> synonyms =
        new HashMap<String, Tuple<String[], String[]>>();

    Synset[] synsets = database.getSynsets(concept);

    for (Synset synset : synsets) {
      int tagCount = 0;

      try {
        tagCount = synset.getTagCount(concept);
      } catch (edu.smu.tspell.wordnet.WordNetException e) {
        tagCount = synset.getTagCount(synset.getWordForms()[0]);
      }

      Tuple<String[], String[]> wordformsAndExamples =
          new Tuple<String[], String[]>(synset.getWordForms(), synset.getUsageExamples(), tagCount);
      synonyms.put(synset.getDefinition(), wordformsAndExamples);
      // System.out.println("tag counts for " + concept + ">><" + tagCount + ":::" + concept);
    }

    return synonyms;
  }
Пример #17
0
  public Result evaluateModel(
      ModelFile solution,
      ModelFile studentModel,
      Result mistakes,
      ArrayList<EvaluationCriteria> markers,
      double totalMarks)
      throws Exception {
    String wordNetpath = this.getClass().getClassLoader().getResource("").getPath() + "/WordNetDic";
    System.setProperty("wordnet.database.dir", wordNetpath);
    SystemSequenceDiagramReader reader = new SystemSequenceDiagramReader();
    SSD solutionDetails = reader.getRefModelDetails(solution);

    EvaluationResult evaluationResult = new EvaluationResult();

    Double studentMarks = totalMarks;

    try {
      /*
       * Evaluate student's model by comparing with reference model and
       * mistakes that are essential.
       */
      for (EvaluationCriteria marker : markers) {
        if (marker.getType().equals("LifeLine")) {

          for (String lifeline : solutionDetails.getLifelines()) {
            /*
             * If lifeline/class name in student's model does not
             * match exactly with the reference model's
             * lifelines/class name, then look for its synonyms.
             */

            if (!lifeline.toLowerCase().contains(marker.getElementName().toLowerCase())
                || !lifeline.toLowerCase().equals(marker.getElementName().toLowerCase())) {
              boolean variationFound = false;
              WordNetDatabase database = WordNetDatabase.getFileInstance();
              Synset[] synsets = database.getSynsets(lifeline.toLowerCase());
              for (int k = 0; k < synsets.length; k++) {
                String[] wordForms = synsets[k].getWordForms();
                for (int j = 0; j < wordForms.length; j++) {
                  if (wordForms[j].toLowerCase().equals(marker.getElementName().toLowerCase())) {
                    variationFound = true;
                    break;
                  }
                }
              }
              if (!variationFound) {
                studentMarks = deductMarks(studentMarks, marker.isEssential(), marker.getMarks());
                break;
              }
            } else {
              solutionDetails.getLifelines().remove(lifeline);
              break;
            }
          }
        }
      }
      /*
       * Comparing operations and their sequence of student model with
       * evaluators' model.
       */
      for (EvaluationCriteria marker : markers) {
        if (marker.getType().equals("Operation")) {
          for (String operation : solutionDetails.getOperations()) {
            boolean found = false;
            // looking for exact match
            if (!found && !operation.toLowerCase().equals(marker.getElementName().toLowerCase())) {
              found = true;
            }
            /*
             * Find occurrence of reference operation in student's
             * operation
             */
            else if (!found && marker.getElementName().contains("(")) {
              String[] values = marker.getElementName().split("\\(");
              Pattern pattern = Pattern.compile(values[0]);
              Matcher matcher = pattern.matcher(operation);
              while (matcher.find()) {
                found = true;
                break;
              }
            }
            if (!found)
              studentMarks = deductMarks(studentMarks, marker.isEssential(), marker.getMarks());
            else {
              solutionDetails.getOperations().remove(operation);
              break;
            }
          }
        }
      }
      /*
       * Evaluate student's model by checking syntax as well as semantic
       * mistakes and deduct marks of those mistakes that are
       * essential(Specified by evaluator).
       */
      HashMap<String, ArrayList<String>> mistakesResult = new HashMap<String, ArrayList<String>>();

      for (EvaluationCriteria marker : markers) {

        if (marker.getType().equals("Mistake")) {
          for (Errors mistake : mistakes.getErrors()) {

            if (mistake.getErrorDiscrption().contains(marker.getElementName())) {
              if (marker.isEssential() == true) {
                studentMarks = deductMarks(studentMarks, marker.isEssential(), marker.getMarks());

                String error = mistake.getErrorName();

                // for errors
                if (mistake.getType().equals("Error")) {

                  if (mistakesResult.containsKey(error)) {
                    mistakesResult
                        .get(error)
                        .add(mistake.getElementName() + "%" + mistake.getErrorDiscrption());
                  } else {
                    ArrayList<String> list = new ArrayList<>();
                    list.add(mistake.getElementName() + "%" + mistake.getErrorDiscrption());
                    mistakesResult.put(error, list);
                  }
                }

                // for warnings
                if (mistake.getType().equals("Warning")) {

                  if (mistakesResult.containsKey(error)) {
                    mistakesResult
                        .get(error)
                        .add(mistake.getElementName() + "%" + mistake.getErrorDiscrption());
                  } else {
                    ArrayList<String> list = new ArrayList<>();
                    list.add(mistake.getElementName() + "%" + mistake.getErrorDiscrption());
                    mistakesResult.put(error, list);
                  }
                }
              }
            }
          }
        }
      }
      Set<String> keySet = mistakesResult.keySet();
      for (String error : keySet) {

        int count = 0;
        EvaluationResultError resultError = new EvaluationResultError();
        for (String string : mistakesResult.get(error)) {
          String[] token = string.split("%");
          EvaluationResultErrorsDetail detail = new EvaluationResultErrorsDetail();
          detail.setElementName(token[0]);
          detail.setErrorDiscption(token[1]);
          resultError.addDetail(detail);
          count++;
        }

        resultError.setErrorName(error);
        resultError.setErrorCount(count);
        evaluationResult.addErrors(resultError);
      }

      /*
       * Store marks of each student in ArrayList
       */

      evaluationResult.setTotalMarks(totalMarks);
      evaluationResult.setStudentMarks(studentMarks);

    } catch (Exception e) {
      e.printStackTrace();
    }
    Result result = new Result();
    result.setEvaluationResult(evaluationResult);
    return result;
  }
Пример #18
0
  public boolean isVerb(String word) {
    for (Synset s : database.getSynsets(word)) if (s.getType().equals(SynsetType.VERB)) return true;

    return false;
  }
public class QueryExpansion {

  private Tagger tagger = new Tagger();
  private static Logger logger = Logger.getLogger(QueryExpansion.class);
  private WordNetDatabase database = WordNetDatabase.getFileInstance();

  public QueryExpansion() throws IOException {
    tagger.loadModel("/cmu/arktweetnlp/model.20120919");
    System.setProperty("wordnet.database.dir", GlobalProperty.getInstance().getWordNetPath());
    // System.setProperty("wordnet.database.dir",
    // "/Users/qiaoyu/Documents/E6998_Semantic_Tech_In_IBM_Watson/twitter-semantic-search/search-pipeline/lib/dict");
    logger.info("Done initializing QueryExpansion");
  }

  public String expandQuery(String originalQuery) {
    StringBuilder result = new StringBuilder();

    List<TaggedToken> taggedTokens = tagger.tokenizeAndTag(originalQuery);

    for (TaggedToken token : taggedTokens) {
      if (token.tag.matches("N|O|^|S|Z|L|M|Y|X|!")) {
        result.append(" ");
        result.append(token.token);
        Synset[] synsets = database.getSynsets(token.token, SynsetType.NOUN);
        if (synsets.length > 0) {
          String[] temp = synsets[0].getWordForms();
          for (int j = 0; j < Math.min(temp.length, 3); ++j) {
            result.append(" ");
            result.append(temp[j]);
          }
        }
      } else if (token.tag.matches("V|T")) {
        result.append(" ");
        result.append(token.token);
        Synset[] synsets = database.getSynsets(token.token, SynsetType.VERB);
        if (synsets.length > 0) {
          String[] temp = synsets[0].getWordForms();
          for (int j = 0; j < Math.min(temp.length, 3); ++j) {
            result.append(" ");
            result.append(temp[j]);
          }
        }
      } else if (token.tag.matches("A")) {
        result.append(" ");
        result.append(token.token);
        Synset[] synsets = database.getSynsets(token.token, SynsetType.ADJECTIVE);
        if (synsets.length > 0) {
          String[] temp = synsets[0].getWordForms();
          for (int j = 0; j < Math.min(temp.length, 3); ++j) {
            result.append(" ");
            result.append(temp[j]);
          }
        }
      } else if (token.tag.matches("R")) {
        result.append(" ");
        result.append(token.token);
        Synset[] synsets = database.getSynsets(token.token, SynsetType.ADVERB);
        if (synsets.length > 0) {
          String[] temp = synsets[0].getWordForms();
          for (int j = 0; j < Math.min(temp.length, 3); ++j) {
            result.append(" ");
            result.append(temp[j]);
          }
        }
      }
    }
    logger.info(
        "Before expansion, query = "
            + originalQuery
            + ", after expansion, query = "
            + result.toString().trim());
    return result.toString().trim();
  }

  public static void main(String[] args) throws IOException {
    QueryExpansion qe = new QueryExpansion();
    System.out.println(
        qe.expandQuery(
            "The Political Power of Social Media | Foreign Affairs: http://fam.ag/i5A7Av"));
  }
}
Пример #20
0
  public boolean isAdjective(String word) {
    for (Synset s : database.getSynsets(word))
      if (s.getType().equals(SynsetType.ADJECTIVE)) return true;

    return false;
  }
 public Wordnet() {
   wordnet = WordNetDatabase.getFileInstance();
 }