private List<Instance> myExtractKeyphrases(String document, int numOfPhrases) throws Exception { // Check whether there is actually any data // if (document.length() == 0 || document.equals("")) { throw new Exception("Couldn't find any data!"); } FastVector atts = new FastVector(3); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); List<Instance> myInstances = new ArrayList<Instance>(); double[] newInst = new double[2]; newInst[0] = (double) data.attribute(0).addStringValue(document); newInst[1] = Instance.missingValue(); data.add(new Instance(1.0, newInst)); m_KEAFilter.input(data.instance(0)); data = data.stringFreeStructure(); ke.setNumPhrases(numOfPhrases); int numPhrases = numOfPhrases; // ke.getNumPhrases(); Instance[] topRankedInstances = new Instance[numPhrases]; Instance inst; // Iterating over all extracted keyphrases (inst) while ((inst = m_KEAFilter.output()) != null) { int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1; if (index < numPhrases) { topRankedInstances[index] = inst; } } double numExtracted = 0, numCorrect = 0; for (int i = 0; i < numPhrases; i++) { if (topRankedInstances[i] != null) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) { numCorrect += 1.0; } myInstances.add(topRankedInstances[i]); } } return myInstances; }
public KEAServer(String model) { setOptions(model); try { loadModel(model); m_KEAFilter.setDebug(true); myLogger.info("Loaded model " + model); } catch (Exception e) { myLogger.log( Level.SEVERE, "Failed loading model {0} , exception: {1}", new Object[] {model, e.toString()}); } }