Java Mention.key Beispiele

Programmiersprache: Java

Klasse / Typ: Mention

Methode / Funktion: key

Beispiele auf hotexamples.com: 4

Java Mention.key - 4 Beispiele gefunden. Dies sind die am besten bewerteten Java Beispiele für die Mention.key, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

mentionID(6)

context(5)

spanToString(5)

offset(5)

name(5)

length(5)

contextAroundMention(5)

key(4)

startIndex(3)

dependency(3)

endIndex(3)

originalRef(3)

goldCorefClusterID(3)

senses(2)

originalSpan(2)

markCoreferent(2)

twinless(2)

markSingleton(1)

headWord(1)

headToken(1)

gloss(1)

getSingletonFeatures(1)

paragraph(1)

generic(1)

sentNum(1)

equals(1)

speakerInfo(1)

corefClusterID(1)

Beispiel #1

Datei anzeigen

Datei: KeywordsGroundTruth.java Projekt: kulashish/entity-disamb

 // for collective training as we already have ground mentions
 public void setKeywordsTraining(
     HashMap<String, ArrayList<XMLTagInfo>> groundMapWiki,
     HashMap<String, ArrayList<XMLTagInfo>> groundMapManual,
     String file) {
   ArrayList<XMLTagInfo> mapForTrainFile = groundMapWiki.get(file);
   for (int i = 0; i < mapForTrainFile.size(); i++) {
     Mention mention = new Mention();
     mention.key = mapForTrainFile.get(i).mention;
     mention.name = mapForTrainFile.get(i).mention;
     mention.length = mapForTrainFile.get(i).length;
     mention.offset = mapForTrainFile.get(i).offset;
     if (null == mention.name)
       mention.name = document.substring(mention.offset, mention.offset + mention.length);
     if (mention.offset < document.length() - 1) {
       int context_lo = Math.max(0, (mention.offset) - contextSize);
       int context_hi = Math.min(document.length() - 1, (mention.offset) + contextSize);
       String contextString = document.substring(context_lo, context_hi);
       mention.context = contextString.replaceAll("[^0-9a-z\\sA-Z/\\-]", "");
       int con_lo = Math.max(0, mention.offset - 10);
       int con_hi = Math.min(document.length() - 1, mention.offset + 10);
       mention.contextAroundMention =
           document.substring(con_lo, con_hi).replaceAll("[^0-9a-z\\sA-Z]", " ");
       mention.contextAroundMention += " " + mention.name.replaceAll("[^0-9a-z\\sA-Z]", " ");
       keywords.add(mention);
     }
   }
   if (groundMapManual != null) {
     ArrayList<XMLTagInfo> mapForTrainFile1 = groundMapManual.get(file);
     for (int i = 0; i < mapForTrainFile1.size(); i++) {
       Mention mention = new Mention();
       mention.key = mapForTrainFile1.get(i).mention;
       mention.name = mapForTrainFile1.get(i).mention;
       mention.length = mapForTrainFile1.get(i).mention.length();
       mention.offset = mapForTrainFile1.get(i).offset;
       int context_lo = Math.max(0, mention.offset - contextSize);
       int context_hi = Math.min(document.length() - 1, mention.offset + contextSize);
       String contextString = document.substring(context_lo, context_hi);
       mention.context = contextString.replaceAll("[^0-9a-z\\sA-Z/\\-]", "");
       int con_lo = Math.max(0, mention.offset - 10);
       int con_hi = Math.min(document.length() - 1, mention.offset + 10);
       mention.contextAroundMention =
           document.substring(con_lo, con_hi).replaceAll("[^0-9a-z\\sA-Z]", " ");
       mention.contextAroundMention += " " + mention.name.replaceAll("[^0-9a-z\\sA-Z]", " ");
       keywords.add(mention);
     }
   }
 }

Beispiel #2

Datei anzeigen

Datei: KeywordsGroundTruth.java Projekt: kulashish/entity-disamb

  public void setKeywordsWikiMiner() {
    try {
      WikipediaAnnotator annotator = new WikipediaAnnotator();
      long annstartTime = System.currentTimeMillis();

      HashMap<String, Label.Sense[]> ment2ent = annotator.annotate(document);

      long annendTime = System.currentTimeMillis();

      long diff1 = (annendTime - annstartTime);

      System.out.println("Time taken by annotater : " + diff1 + " milliseconds");

      for (String key : ment2ent.keySet()) {

        // sunny:adding code to check if sense is within freebase dataset
        // adding it only if we find it.

        //				Vector<Label.Sense> updatedsenses = new Vector();
        //				for(Label.Sense s : ment2ent.get(key)){
        //					String entity = s.getTitle().replace(" ", "_");
        //					String freebaseid =
        // WikiToFreebaseIDMap.getInstance().getFreeBaseID("\"/wikipedia/en_title/" + entity +
        // "\"");
        //
        //					if(freebaseid != null){
        //						updatedsenses.add(s);
        //					}
        //				}
        //
        //				Label.Sense[] sensearray = new Label.Sense[updatedsenses.size()];
        //
        //				updatedsenses.toArray(sensearray);

        LabelSense senses = new LabelSense(ment2ent.get(key));
        // LabelSense senses = new LabelSense(sensearray);
        Mention mention = new Mention();
        // System.out.println("key from ment2ent : " + key);
        String ment = key.split("_")[0];

        // System.out.println("ment from ment2ent : " + ment);
        int off = Integer.parseInt(key.split("_")[1]);
        mention.key = ment;
        mention.name = ment;
        mention.length = ment.length();
        mention.offset = off;
        mention.context = getContext(off, mention.length, contextSize);
        mention.contextAroundMention = getContext(off, mention.length, 10);

        mention.senses = senses;
        keywords.add(mention);
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

Beispiel #3

Datei anzeigen

Datei: KeywordsGroundTruth.java Projekt: kulashish/entity-disamb

  public void consolidateMentions(int maxLength) {

    if (!Config.Server && thesaurus == null) {
      thesaurus = new Wikisaurus();
    }

    ClientWikisauras obj = new ClientWikisauras();

    //		LuceneIndexWrapper luceneIndex = new LuceneIndexWrapper(
    //				props.getCompleteIndex(), props.getRedirectIndex(),
    //				props.getInlinkIndex(), props.getDisambIndex(),
    //				props.getAnchorIndex());

    ArrayList<Mention> mentions = new ArrayList<Mention>();
    mentions.addAll(keywords);
    // System.out.println("consolidating mentions size:" + mentions.size());
    keywords = new ArrayList<Mention>();
    Integer[] token_type = new Integer[mentions.size()];
    for (int i = 0; i < token_type.length; i++) token_type[i] = 0;

    int curr_offset = 0;
    String curr_mention = "";
    for (int i = 0; i < mentions.size(); i++) {
      if (token_type[i] != 0) {
        // i++;
        continue;
      }
      curr_offset = mentions.get(i).offset;
      curr_mention = mentions.get(i).name;
      // System.out.println("offset: " + curr_offset + " curr_mention: " + curr_mention + " context:
      // " + mentions.get(i).context);

      String[] allWords = new String[maxLength];
      Integer[] allOffset = new Integer[maxLength];
      String currWord = curr_mention;
      Integer currWordEnd = curr_offset + curr_mention.length() + 1;

      allWords[0] = currWord;
      allOffset[0] = curr_offset;
      int k = 1;
      for (; k < maxLength; k++) {
        currWordEnd = document.indexOf(" ", currWordEnd + 1);
        if (currWordEnd == -1) currWordEnd = document.length();
        if (curr_offset < 0 || curr_offset >= document.length()) {
          k--;
          break;
        }
        currWord = document.substring(curr_offset, currWordEnd);
        allWords[k] = currWord;
        allOffset[k] = currWordEnd;
        if (currWordEnd >= document.length()) break;
      }
      if (k == maxLength) k--;

      for (; k >= 0; k--) {
        LabelSense senses = null;
        // System.out.println("allwords[" + k + "] : " + allWords[k]);
        try {
          if (Config.Server) senses = obj.getSenses(allWords[k]);
          else {
            // String possibleMention = WordUtils.capitalize(allWords[k]);
            Label.Sense[] temp = thesaurus.getSenses(allWords[k]);

            //						List<String> qwords = Arrays.asList(allWords[k].split(" "));
            //
            //						boolean nostopword = true;
            //						for(String item : qwords){
            //							if(Stopwords.isStopword(item)){
            //								nostopword = false;
            //							}
            //						}

            if (temp != null) senses = new LabelSense(temp);

            //						Vector<String> sensewmc = new Vector();
            //						Vector<Double> sensewmp = new Vector();

            // hard coded search for the word in the freebase dataset. should not be done for long
            // text hence commenting
            //						if(!Stopwords.isStopword(allWords[k].toLowerCase())){
            //							Vector<String> freebaseTitles =
            // WikiToFreebaseIDMap.getInstance().getAllWikiTitles(allWords[k].toLowerCase());
            //							//String title = "/wikipedia/en_title/" + allWords[k].replace(" ", "_");
            //							//java.util.regex.Pattern pa =
            // java.util.regex.Pattern.compile(title.toLowerCase());
            //							//java.util.regex.Matcher ma = pa.matcher("");
            //
            //							if(freebaseTitles != null){
            //								for(String fbTitle : freebaseTitles){
            //									fbTitle = fbTitle.replace("/wikipedia/en_title/", "");
            //									fbTitle = fbTitle.replace("\"", "");
            //									fbTitle = fbTitle.replace("_", " ");
            //									System.out.println("fbTitle : " + fbTitle);
            //									sensewmc.add(fbTitle);
            //									sensewmp.add(new Double(1.0/(freebaseTitles.size())));
            //									//sensewmp.add(new Double(0));
            //								}
            //							}
            //						}

            //						if((temp != null) || (sensewmc.size() > 0)){
            //							senses = new LabelSense();
            //
            //							int scount= 0,total = 0;
            //
            //							if(temp != null){
            //								total = temp.length + sensewmc.size();
            //								senses.wikiMinerCandidate = new String[temp.length + sensewmc.size()];
            //								senses.wikiMinerProbability = new double[temp.length + sensewmc.size()];
            //
            //								for(;scount<temp.length;++scount){
            //									senses.wikiMinerCandidate[scount] = temp[scount].getTitle();
            //									senses.wikiMinerProbability[scount] = temp[scount].getPriorProbability();
            //								}
            //								scount = temp.length;
            //							}
            //							else{
            //								total = sensewmc.size();
            //								senses.wikiMinerCandidate = new String[sensewmc.size()];
            //								senses.wikiMinerProbability = new double[sensewmc.size()];
            //							}
            //
            //							for(int cnt = 0;scount < total;++scount,++cnt){
            //								senses.wikiMinerCandidate[scount] = sensewmc.elementAt(cnt);
            //								senses.wikiMinerProbability[scount] = sensewmp.elementAt(cnt);
            //							}
            //						}

            // else if((k >= 1 && k <= 2) || ((k == 0) &&
            // (!Stopwords.isStopword(allWords[k].split("_")[0]))))
            //						else if((k <= 2) && (nostopword == true))
            //						{
            //							String myquery = allWords[k];
            //
            //							String query = luceneIndex.buildPhraseSearchQuery(myquery,null);
            //
            //							System.out.println("query : " + query);
            //
            //							if (query != null) {
            //								luceneIndex.searchStringInIndex(query, 2);
            //
            //								System.out.println("found : " + luceneIndex.hits.scoreDocs.length);
            //
            //								Vector<String> sensewmc = new Vector();
            //								Vector<Double> sensewmp = new Vector();
            //
            //								for (int licount = 0;licount < luceneIndex.hits.scoreDocs.length;++licount) {
            //									Document doc =
            // luceneIndex.searcher.doc(luceneIndex.hits.scoreDocs[licount].doc); // get the next
            // document
            //
            //									String pagetitle = doc.get("page_title");
            //									String disamb = doc.get("title_disamb");
            //									if (!((disamb == null) || disamb.equals(""))){
            //										pagetitle = pagetitle + " (" + disamb + ")";
            //									}
            //
            //									System.out.println("lucene hit: " + pagetitle + " score : " +
            // luceneIndex.hits.scoreDocs[licount].score);
            //
            //									//if(luceneIndex.hits.scoreDocs[licount].score < 0.5)
            //									//	continue;
            //
            //									sensewmc.add(pagetitle);
            //									sensewmp.add(new Double(luceneIndex.hits.scoreDocs[licount].score));
            //
            //									if(sensewmc.size() == 3)
            //										break;
            //								}
            //
            //								if(sensewmc.size() > 0){
            //									senses = new LabelSense();
            //
            //									if(sensewmc.size() == 3){
            //										senses.wikiMinerCandidate = new String[3];
            //										senses.wikiMinerProbability = new double[3];
            //									}
            //									else{
            //										senses.wikiMinerCandidate = new String[sensewmc.size()];
            //										senses.wikiMinerProbability = new double[sensewmc.size()];
            //									}
            //								}
            //
            //								for(int scount=0;scount<sensewmc.size();++scount){
            //									senses.wikiMinerCandidate[scount] = sensewmc.elementAt(scount);
            //									senses.wikiMinerProbability[scount] = sensewmp.elementAt(scount);
            //								}
            //							}
            //						}
          }
        } catch (Exception e) {
          e.printStackTrace();
          System.exit(1);
        }
        if (null != senses) {

          //					Vector<String> updatedsensewmc = new Vector();
          //					Vector<Double> updatedsensewmp = new Vector();
          //					for(int x=0;x<senses.wikiMinerCandidate.length;++x){
          //						System.out.println("senses: " + senses.wikiMinerCandidate[x]);
          //						String entity = senses.wikiMinerCandidate[x].replace(" ", "_");
          //						String freebaseid =
          // WikiToFreebaseIDMap.getInstance().getFreeBaseID("\"/wikipedia/en_title/" + entity +
          // "\"");
          //						if(freebaseid != null){
          //							updatedsensewmc.add(senses.wikiMinerCandidate[x]);
          //							updatedsensewmp.add(senses.wikiMinerProbability[x]);
          //						}
          //					}
          //
          //					LabelSense lsense = new LabelSense();
          //
          //					lsense.wikiMinerCandidate = new String[updatedsensewmc.size()];
          //					lsense.wikiMinerProbability = new double[updatedsensewmc.size()];
          //
          //					for(int scount=0;scount<updatedsensewmc.size();++scount){
          //						lsense.wikiMinerCandidate[scount] = updatedsensewmc.elementAt(scount);
          //						lsense.wikiMinerProbability[scount] = updatedsensewmp.elementAt(scount);
          //					}

          Mention new_mention = new Mention();
          new_mention.name = allWords[k];
          new_mention.length = new_mention.name.length();
          new_mention.offset = curr_offset;
          new_mention.context = getContext(curr_offset, new_mention.length, contextSize);
          new_mention.contextAroundMention = getContext(curr_offset, new_mention.length, 10);
          if (k == 0) new_mention.key = mentions.get(i).key;
          // new_mention.senses = lsense;
          new_mention.senses = senses;

          System.out.println("wikiminer candidate for : " + new_mention.name);
          for (int ic = 0; ic < senses.wikiMinerCandidate.length; ++ic) {
            System.out.println(
                "\t" + senses.wikiMinerCandidate[ic] + "  " + senses.wikiMinerProbability[ic]);
          }

          keywords.add(new_mention);
          // System.out.println("new_mention offset + length : " + new_mention.offset + " " +
          // new_mention.length);
          if (!isArticleToken(curr_mention)) {
            for (int j = i;
                j < mentions.size()
                    && mentions.get(j).offset < (new_mention.offset + new_mention.length);
                j++) token_type[j] = 1;
          } else {
            token_type[i] = 2;
          }
          break;
        }
      }

      if (token_type[i] == 0 && !isArticleToken(curr_mention) && isValidToken(curr_mention)) {
        keywords.add(mentions.get(i));
      }
    }
  }

Beispiel #4

Datei anzeigen

Datei: KeywordsGroundTruth.java Projekt: kulashish/entity-disamb

  public void setKeywords(boolean stem) throws Exception {
    tagged_document = tagger.tagString(document);
    ArrayList<String> tokens = new ArrayList<String>();

    // System.out.println("tagged document : " + tagged_document);

    StringTokenizer str = new StringTokenizer(tagged_document);

    while (str.hasMoreTokens()) {
      String token = str.nextToken();
      if (token == null || "".equals(token) || " ".equals(token)) continue;
      if (!Stopwords.isStopword(token.split("_")[0])
          || noun_tags.contains(token.split("_")[1])
          || adj_tags.contains(token.split("_")[1])
          || extra_tags.contains(token.split("_")[1])) tokens.add(token);
      // System.out.println("token : " + token);
      if (!Stopwords.isStopword(token.split("_")[0])) {
        //				System.out.println("token added.");
        tokens.add(token);
      }
    }

    String prev_tag = null; // if previous token was a noun then add n-gram
    // noun clause

    int curr_offset = 0, currbyte = 0;
    for (int i = 0; i < tokens.size(); i++) {
      // System.out.print(" "+tokens.get(i));
      if (tokens.get(i) == null) continue;
      Matcher matcher = pattern.matcher(tokens.get(i));
      matcher.find();
      String word = matcher.group(1);
      String tag = matcher.group(2);

      // System.out.println("word: " + word + " tag: " + tag);

      if (word == null || "".equals(word)) {
        prev_tag = null;
        continue;
      }
      String token = word.replaceAll("[^0-9a-z\\sA-Z/\\-]", "");
      if ("".equals(token) || "/".equals(token)) {
        prev_tag = null;
        continue;
      }
      if (!(noun_tags.contains(tag) || adj_tags.contains(tag) || extra_tags.contains(tag))) {
        prev_tag = null;
        continue;
      }
      Mention mention = new Mention();

      if (tag.equals("JJ")) {
        String temp = TestJAWS.getNounForm(token);
        if (temp != null && !"".equals(temp)) {
          mention.key = temp;
          prev_tag = null;
        } else {
          mention.key = token;
          prev_tag = null;
        }
      } else {
        mention.key = token;
      }

      mention.name = word;
      mention.length = word.length();
      curr_offset = document.indexOf(word, curr_offset);

      mention.offset = curr_offset;
      mention.context = getContext(curr_offset, mention.length, contextSize);
      mention.contextAroundMention = getContext(curr_offset, mention.length, 10);
      // StringTokenizer str1 = new StringTokenizer(contextString);
      // while(str1.hasMoreTokens()){
      // String w=str1.nextToken();
      // if (w == null || "".equals(w) || " ".equals(w)) continue;
      // mention.context.add(w);
      // }
      // parseContext(mention);
      // System.out.println("mention.name : " + mention.name + " offset : " + mention.offset);
      keywords.add(mention);
    }
    // System.out.println("Keywords: "+getMentionNames());
    consolidateMentions(6);
    // consolidateMentions(4);
  }