public List<String[]> getRankedEntities(String entity, List<String> contextPhrases) { List<String[]> rankedEntities = new ArrayList<String[]>(); PriorityQueue<String[]> queue = new PriorityQueue<String[]>(); contextPhrases.remove(entity); StringBuffer contextQuery = new StringBuffer(); for (String c : contextPhrases) { contextQuery.append("\"" + c + "\"" + " "); } int contextCount = YahooBOSS.makeQuery(contextQuery.toString()); String xml = ""; if ((xml = Wikiminer.getXML(entity, false)) != null) { ArrayList<String[]> senses = Wikiminer.getWikipediaSenses(xml, true); for (String[] senseArr : senses) { int senseCount = YahooBOSS.makeQuery('"' + senseArr[0] + "\" " + contextQuery.toString()); queue.add(senseArr, ((double) senseCount / (double) contextCount)); } } while (queue.hasNext()) { rankedEntities.add(queue.next()); } return rankedEntities; }
public static void main(String[] args) { ExtractEntities ee = new ExtractEntities(); DocumentBuilder db = null; DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); BufferedWriter bw = null; BufferedReader br = null; try { br = new BufferedReader(new FileReader("data/" + args[0] + ".txt")); bw = new BufferedWriter(new FileWriter("data/" + args[0] + "_wiki_entities.txt")); } catch (Exception e) { e.printStackTrace(); } try { db = dbf.newDocumentBuilder(); } catch (Exception e) { e.printStackTrace(); } String line = ""; try { while ((line = br.readLine()) != null) { bw.write("=====================================================\n"); bw.write("Tweet: " + line + "\n"); bw.write("=====================================================\n"); HashSet<String> entities = ee.getEntitiesinTweet(line); List<String> contextPhrases = new ArrayList<String>(entities); for (String entity : entities) { bw.write("query: " + entity + "\n"); List<String[]> rankedEntities = ee.getRankedEntities(entity, contextPhrases); // bw.write("RankedEntities: "+rankedEntities+"\n"); for (String[] entityArr : rankedEntities) { bw.write("Entity: " + entityArr[0] + "\n"); String xml = Wikiminer.getXML(entityArr[1], true); bw.write( "RankedTypes: " + Wikiminer.getRankedTypes(entityArr[0], xml, contextPhrases, 5) + "\n"); } bw.write("--------------------------------------------------\n"); } bw.write("\n"); } bw.flush(); } catch (Exception e) { e.printStackTrace(); } }