public ItemType guessLexicalItemType(String input) { String firstWord = input.split(" ")[0]; if (firstWord.matches("[0-9]*\\-[anvr]")) return ItemType.SENSE_OFFSETS; if (firstWord.matches("[^ ]*%[0-9]*:[^ ]*")) return ItemType.SENSE_KEYS; if (WordNetUtils.getInstance().mapWordSenseToIWord(firstWord) != null) return ItemType.WORD_SENSE; if (firstWord.matches("[^ ]*#[nvra]")) return ItemType.SURFACE_TAGGED; return ItemType.SURFACE; }
public Pair<List<String>, List<String>> cookLexicalItem( String text, ItemType textType, boolean discardStopwords) { try { List<String> cookedSentence = new ArrayList<String>(); Pair<List<String>, List<String>> out = new Pair<List<String>, List<String>>(null, null); for (String item : Arrays.asList(text.split(" "))) { if (item.trim().length() == 0) continue; switch (textType) { case SENSE_OFFSETS: cookedSentence.add(item); break; case SENSE_KEYS: IWord sense = WordNetUtils.getInstance().getSenseFromSenseKey(item); cookedSentence.add( GeneralUtils.fixOffset(sense.getSynset().getOffset(), sense.getPOS())); break; case WORD_SENSE: IWord snse = WordNetUtils.getInstance().mapWordSenseToIWord(item); cookedSentence.add(GeneralUtils.fixOffset(snse.getSynset().getOffset(), snse.getPOS())); break; case SURFACE_TAGGED: cookedSentence.add(item); break; } } if (textType.equals(ItemType.SURFACE)) { out = TextualSimilarity.getInstance().cookSentence(text); cookedSentence = out.first; } if (cookedSentence == null) cookedSentence = new ArrayList<String>(); List<String> newCS = new ArrayList<String>(); for (String s : cookedSentence) { // if it is a synset if (s.matches("[0-9]*\\-[anvr]")) { newCS.add(s); continue; } String comps[] = s.split("#"); String word = comps[0]; String ps = comps[1]; // otherwise check word exists in WordNet if (!TextualSimilarity.getInstance().isOOV(word, ps)) newCS.add(word + "#" + ps); } cookedSentence = newCS; return new Pair<List<String>, List<String>>(cookedSentence, out.second); } catch (Exception e) { e.printStackTrace(); } return null; }