public ParseEssay() { System.setProperty("wordnet.database.dir", "../war/dict"); synonyms = new ArrayList<String>(); database = WordNetDatabase.getFileInstance(); baos = new ByteArrayOutputStream(); lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); // ?? }
public class Parser { private String grammar = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"; private String[] options = {"-maxLength", "80", "-retainTmpSubcategories"}; private LexicalizedParser lp = LexicalizedParser.loadModel(grammar, options); private TreebankLanguagePack tlp = lp.getOp().langpack(); private GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); public Parser() {} public LinkedList<String> getKeyWrodsFromSentence(String string) { LinkedList<String> list = new LinkedList<String>(); String[] sent = string.split(" "); List<HasWord> sentence = new ArrayList<HasWord>(); for (String word : sent) sentence.add(new Word(word)); Tree parse = lp.parse(sentence); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); List<TypedDependency> tdl = gs.typedDependenciesCCprocessed(); String[] current; String type, key; List<CoreLabel> labelsList = parse.taggedLabeledYield(); for (Label l : labelsList) { current = l.toString().split("-"); type = current[0]; if (type.equals("NN") || type.equals("NNS")) { key = sent[Integer.parseInt(current[1])]; list.add(key); } } return list; } public LinkedList<String> getKeyWrodsFromSentenceTest(String string) { LinkedList<String> list = new LinkedList<String>(); String[] sent = string.split(" "); List<HasWord> sentence = new ArrayList<HasWord>(); for (String word : sent) { sentence.add(new Word(word)); } Tree parse = lp.parse(sentence); parse.pennPrint(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); List<TypedDependency> tdl = gs.typedDependenciesCCprocessed(); System.out.println(tdl); System.out.println(); System.out.println("The words of the sentence:"); for (Label lab : parse.yield()) { if (lab instanceof CoreLabel) { System.out.println(((CoreLabel) lab).toString(CoreLabel.OutputFormat.VALUE_MAP)); } else { System.out.println(lab); } } System.out.println(); System.out.println("tagged"); System.out.println(parse.taggedYield()); List<CoreLabel> temp = parse.taggedLabeledYield(); for (Label l : temp) { String[] sss = l.toString().split("-"); String type = sss[0]; System.out.println(sss[0] + " " + sss[1] + " " + sent[Integer.parseInt(sss[1])]); } for (Iterator<String> ite = list.iterator(); ite.hasNext(); ) System.out.println(ite.next()); return list; } public static void main(String[] args) throws IOException { Parser parser = new Parser(); parser.getKeyWrodsFromSentence( "When athletes begin to exercise, their heart rates and respiration rates increase. At what level of organization does the human body coordinate these functions?"); parser.getKeyWrodsFromSentenceTest( "When athletes begin to exercise, their heart rates and respiration rates increase. At what level of organization does the human body coordinate these functions?"); // main2(); } }
/** * parse sentence and generate .trees file * * @param en * @param align * @param out */ public static void parse(String en, String align, String out, boolean verbose) { // use alignments? boolean use_alignments = true; if (align.startsWith("no_align")) { use_alignments = false; System.err.println("Not using alignments."); } else { System.err.println("Using alignments from " + align); } // setup stanfordparser String grammar = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"; String[] options = {"-outputFormat", "wordsAndTags, typedDependencies"}; LexicalizedParser lp = LexicalizedParser.loadModel(grammar, options); TreebankLanguagePack tlp = lp.getOp().langpack(); java.util.function.Predicate<java.lang.String> punctuationFilter = x -> true; GrammaticalStructureFactory gsf = new edu.stanford.nlp.trees.EnglishGrammaticalStructureFactory(punctuationFilter); // read document Iterable<List<? extends HasWord>> sentences; Reader r = new Reader(en); String line = null; List<List<? extends HasWord>> tmp = new ArrayList<List<? extends HasWord>>(); while ((line = r.getNext()) != null) { Tokenizer<? extends HasWord> token = tlp.getTokenizerFactory().getTokenizer(new StringReader(line)); List<? extends HasWord> sentence = token.tokenize(); tmp.add(sentence); } sentences = tmp; // set up alignment file reader Reader alignment = new Reader(); if (use_alignments) { alignment = new Reader(align); } // set up tree file writer Writer treeWriter = new Writer(out); // parse long start = System.currentTimeMillis(); // System.err.print("Parsing sentences "); int sentID = 0; for (List<? extends HasWord> sentence : sentences) { Tree t = new Tree(); // t.setSentID(++sentID); System.err.println("parse Sentence :" + sentence + "..."); // System.err.print("."); System.err.println("-----------------------------------------------------------------------"); edu.stanford.nlp.trees.Tree parse = lp.parse(sentence); // parse.pennPrint(); // List for root node and lexical nodes List<Node> loneNodes = new LinkedList<Node>(); List<Node> governingNodes = new LinkedList<Node>(); // ROOT node Node root = new Node(true, true); root.setTag("ROOT"); t.setRoot(root); loneNodes.add(root); governingNodes.add(root); // tagging int counter = 0; String surface = ""; String tag = ""; for (TaggedWord tw : parse.taggedYield()) { Node n = new Node(); Node governingNode = new Node(); n.setNodeID(++counter); surface = tw.value(); tag = tw.tag(); if (surface.startsWith("-LRB-")) { surface = "("; } else if (surface.startsWith("-RRB-")) { surface = ")"; // } else if (surface.startsWith("-LSB-")){ // surface = "["; // } else if (surface.startsWith("-RSB-")){ // surface = "]"; // } else if (surface.startsWith("-LCB-")){ // surface = "{"; // } else if (surface.startsWith("-RCB-")){ // surface = "}"; } else if (surface.startsWith("''")) { surface = "\""; } tag = tag.replaceAll("#", "-NUM-"); surface = surface.replaceAll("&", "-AMP-"); surface = surface.replaceAll("#", "-NUM-"); surface = surface.replaceAll(">", "-GRE-"); surface = surface.replaceAll("=", "-EQU-"); n.setInitialLexicalIndex(counter); governingNode.setInitialLexicalIndex(counter); n.setSurface(surface); // System.out.print("("+tw.value()+" : "); n.setTag(tag); governingNode.setTag("_" + tag); governingNode.setLabel("_gov"); // System.out.print(tw.tag()+")"); loneNodes.add(n); governingNodes.add(governingNode); governingNode.setChild(n); } // System.out.println(""); // t.setSentLength(t.getNodes().size() - 1); // List<Node> loneNodes = new LinkedList<Node>(); Node[] nodes = new Node[2000]; // labeling int depIndex; int govIndex; String[] depInfo; String[] govInfo; GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); List<TypedDependency> tdl = gs.typedDependencies(false); // List<TypedDependency> tdl = gs.typedDependenciesCCprocessed(); for (TypedDependency td : tdl) { depIndex = td.dep().index(); govIndex = td.gov().index(); // System.out.println("Index1:"+depIndex); // System.out.println("Index2:"+govIndex); // if (nodes[depIndex] == null){ // System.out.println("Making node!"); // nodes[depIndex] = new Node(); // } // if (nodes[govIndex] == null){ // System.out.println("Making node!"); // nodes[govIndex] = new Node(); // } Node dep = loneNodes.get((depIndex)); Node gov = governingNodes.get((govIndex)); Node depcopy = governingNodes.get((depIndex)); Node govcopy = loneNodes.get((govIndex)); dep.setLabel(td.reln().toString()); depcopy.setLabel(td.reln().toString()); govcopy.setLabel("head"); // System.out.println(td.toString()); govInfo = td.gov().toString().split("/"); depInfo = td.dep().toString().split("/"); // System.out.println(td.gov().toString()); // System.out.println(td.dep().toString()); // dep.setSurface(depInfo[0]); // dep.setTag(depInfo[1]); gov.setChild(governingNodes.get(depIndex)); governingNodes.get(depIndex).setParent(gov); // gov.setChild(dep); dep.setParent(governingNodes.get(depIndex)); } // t.setRoot(nodes[0]); // Collapse tree to remove unneeded governing nodes: Node gov; Node dep; Node parent; List<Node> children; for (int i = 1; i < governingNodes.size(); i++) { // start with index 1 to skip root gov = governingNodes.get(i); dep = loneNodes.get(i); if (gov.getChildren().size() <= 1) { int k = 0; parent = gov.getParent(); children = parent.getChildren(); for (Node n : children) { if (n == gov) { gov.getParent().replaceChild(k, dep); dep.setParent(gov.getParent()); } k++; } } } // Mark head nodes with appropriate label: int k = 0; for (Node n : loneNodes) { if (k != 0) { if (n.getLabel() == n.getParent().getLabel()) { n.setLabel("head"); } } else { n.setLabel("null"); } k++; } // Sort lexical children of each governing node in lexical order for (Node n : governingNodes) { n.sortChildrenByInitialIndex(); } // combine with alignment if (use_alignments) { t.initialize(alignment.readNextAlign()); } else { t.initializeUnaligned(); } // write tree to file treeWriter.write(t); // print tree to console System.out.println(t.toSentence()); if (verbose) { System.err.println(t.toString()); // t.recursivePrint(); } System.err.println("#######################################################################"); } long stop = System.currentTimeMillis(); System.err.println("...done! [" + (stop - start) / 1000 + " sec]."); treeWriter.close(); }
class StanfordParser { private final String PCG_MODEL = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"; private final TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "invertible=true"); private final LexicalizedParser parser = LexicalizedParser.loadModel(PCG_MODEL); private final String serializedClassifier = "edu/stanford/nlp/models/ner/english.muc.7class.distsim.crf" + ".ser.gz"; private final AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(serializedClassifier); public ParsedSentence parseSentence(String sentence, boolean removePunctuation) { if (removePunctuation) { sentence = cleanSentence(sentence); } final Tree posTree = getPosTree(sentence); return new ParsedSentence(posTree, getDependencies(posTree), findNamedEntities(sentence)); } public Tense calculateTense(String clause) { final Tree posTree = getPosTree(clause); final Tree word = posTree.getLeaves().get(0); final String pos = word.parent(posTree).label().value().toLowerCase(); if (pos.equals("md")) { return Tense.FUTURE; } if (pos.equals("vbd") || pos.equals("vbn")) { return Tense.PAST; } return Tense.PRESENT; } public Map<String, NamedEntity> findNamedEntities(String sentence) { final Map<String, NamedEntity> namedEntities = new HashMap<>(); final List<Triple<String, Integer, Integer>> nerSubstrings = findNerSubstrings(sentence); for (final Triple<String, Integer, Integer> substring : nerSubstrings) { namedEntities.put( sentence.substring(substring.second(), substring.third()), NamedEntity.getNamedEntity(substring.first())); } return namedEntities; } private List<Triple<String, Integer, Integer>> findNerSubstrings(String sentence) { return classifier.classifyToCharacterOffsets(sentence); } private String cleanSentence(String sentence) { return sentence.replaceAll("\\p{Punct}", "").replaceAll("[ ]+", " "); } private Tree getPosTree(String sentence) { final Tokenizer<CoreLabel> tokenizer = tokenizerFactory.getTokenizer(new StringReader(sentence)); final List<CoreLabel> tokens = tokenizer.tokenize(); return parser.apply(tokens); } private Collection<TypedDependency> getDependencies(Tree sentenceParseTree) { final TreebankLanguagePack tlp = new PennTreebankLanguagePack(); final GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); final GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceParseTree); return gs.typedDependenciesCollapsed(); } }
/** * for testing -- CURRENTLY BROKEN!!! * * @param args input dir and output filename * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length != 3) { throw new RuntimeException("args: treebankPath trainNums testNums"); } ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams(); ctpp.charTags = true; // TODO: these options are getting clobbered by reading in the // parser object (unless it's a text file parser?) Options op = new Options(ctpp); op.doDep = false; op.testOptions.maxLength = 90; LexicalizedParser lp; try { FileFilter trainFilt = new NumberRangesFileFilter(args[1], false); lp = LexicalizedParser.trainFromTreebank(args[0], trainFilt, op); try { String filename = "chineseCharTagPCFG.ser.gz"; System.err.println("Writing parser in serialized format to file " + filename + ' '); System.err.flush(); ObjectOutputStream out = IOUtils.writeStreamFromString(filename); out.writeObject(lp); out.close(); System.err.println("done."); } catch (IOException ioe) { ioe.printStackTrace(); } } catch (IllegalArgumentException e) { lp = LexicalizedParser.loadModel(args[1], op); } FileFilter testFilt = new NumberRangesFileFilter(args[2], false); MemoryTreebank testTreebank = ctpp.memoryTreebank(); testTreebank.loadPath(new File(args[0]), testFilt); PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true); WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser(); WordCatEqualityChecker eqcheck = new WordCatEqualityChecker(); EquivalenceClassEval eval = new EquivalenceClassEval(eqclass, eqcheck); // System.out.println("Preterminals:" + preterminals); System.out.println("Testing..."); for (Tree gold : testTreebank) { Tree tree; try { tree = lp.parseTree(gold.yieldHasWord()); if (tree == null) { System.out.println("Failed to parse " + gold.yieldHasWord()); continue; } } catch (Exception e) { e.printStackTrace(); continue; } gold = gold.firstChild(); pw.println(Sentence.listToString(gold.preTerminalYield())); pw.println(Sentence.listToString(gold.yield())); gold.pennPrint(pw); pw.println(tree.preTerminalYield()); pw.println(tree.yield()); tree.pennPrint(pw); // Collection allBrackets = WordCatConstituent.allBrackets(tree); // Collection goldBrackets = WordCatConstituent.allBrackets(gold); // eval.eval(allBrackets, goldBrackets); eval.displayLast(); } System.out.println(); System.out.println(); eval.display(); }
public ParseResult parseSentence(String sentence) { String result = ""; // see if a parser socket server is available int port = new Integer(ARKref.getProperties().getProperty("parserServerPort", "5556")); String host = "127.0.0.1"; Socket client; PrintWriter pw; BufferedReader br; String line; try { client = new Socket(host, port); pw = new PrintWriter(client.getOutputStream()); br = new BufferedReader(new InputStreamReader(client.getInputStream())); pw.println(sentence); pw.flush(); // flush to complete the transmission while ((line = br.readLine()) != null) { // if(!line.matches(".*\\S.*")){ // System.out.println(); // } if (br.ready()) { line = line.replaceAll("\n", ""); line = line.replaceAll("\\s+", " "); result += line + " "; } else { lastParseScore = new Double(line); } } br.close(); pw.close(); client.close(); System.err.println("parser output:" + result); lastParse = readTreeFromString(result); boolean success = !Strings.normalizeWhitespace(result).equals("(ROOT (. .))"); return new ParseResult(success, lastParse, lastParseScore); } catch (Exception ex) { // ex.printStackTrace(); } // if socket server not available, then use a local parser object if (parser == null) { if (DEBUG) System.err.println("Could not connect to parser server. Loading parser..."); try { Options op = new Options(); String serializedInputFileOrUrl = ClassLoader.getSystemResource( ARKref.getProperties() .getProperty("parserGrammarFile", "lib/englishPCFG.ser.gz")) .toExternalForm(); parser = LexicalizedParser.loadModel(serializedInputFileOrUrl, op); // int maxLength = new Integer(ARKref.getProperties().getProperty("parserMaxLength", // "40")).intValue(); // parser.setMaxLength(maxLength); parser.setOptionFlags("-outputFormat", "oneline"); } catch (Exception e) { e.printStackTrace(); } } try { DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(sentence)); LexicalizedParserQuery query = parser.parserQuery(); if (query.parse(dp.iterator().next())) { lastParse = query.getBestParse(); lastParseScore = query.getPCFGScore(); TreePrint tp = new TreePrint("penn", "", new PennTreebankLanguagePack()); StringWriter sb = new StringWriter(); pw = new PrintWriter(sb); tp.printTree(lastParse, pw); pw.flush(); lastParse = readTreeFromString(sb.getBuffer().toString()); return new ParseResult(true, lastParse, lastParseScore); } } catch (Exception e) { } lastParse = readTreeFromString("(ROOT (. .))"); lastParseScore = -99999.0; return new ParseResult(false, lastParse, lastParseScore); }
public LexicalParsingEngine(String parserModel) throws FileNotFoundException, UnsupportedEncodingException { System.out.println("Initializing Lexical Parser..."); lp = LexicalizedParser.loadModel(parserModel); }
@SuppressWarnings("serial") public class TextSimplification { public static List<String> replacementList = new ArrayList<String>() { { add("he"); add("him"); add("his"); add("she"); add("her"); add("they"); add("them"); add("their"); add("i"); add("her's"); add("you"); add("your"); add("your's"); add("mine"); add("my"); add("us"); add("we"); // add("it"); // add("its"); // add("this"); // add("that"); } }; public static String resolvedSentences = ""; private static final String PCG_MODEL = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"; private static final TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "invertible=true"); private static final LexicalizedParser parser = LexicalizedParser.loadModel(PCG_MODEL); public static void main(String[] args) throws IOException { // :TODO // * Do not consider roots with more than 2 words // * Root should not be he, she her, his, him etc... // * If it is, den take the last known gender noun and make it the root. String text = new String(Files.readAllBytes(Paths.get(args[0])), StandardCharsets.UTF_8); text = text.replace("\n", " "); // Resolve Anaphora System.out.println("Anaphora Resolution..."); resolveAnaphora(text); System.out.println( "Anaphora Resolution Completed!\nIntermediate Output in \"AnaphoraResolved.txt\""); writeToFile(resolvedSentences, "AnaphoraResolved.txt"); // Create ParseTrees System.out.println("Parse Tree Generation..."); startParsing((resolvedSentences)); System.out.println("Parse Tree Generation Completed!\nIntermediate Output in \"Tree.txt\""); } public static void resolveAnaphora(String text) { RedwoodConfiguration.empty().capture(System.err).apply(); Annotation document = new Annotation(text); Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.put("dcoref.female", "female.unigram.txt"); props.put("dcoref.male", "male.unigram.txt"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); pipeline.annotate(document); RedwoodConfiguration.current().clear().apply(); Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class); List<CoreMap> stnfrdSentences = document.get(SentencesAnnotation.class); ImmutableMultimap.Builder<Integer, Pair<CorefChain, CorefMention>> records = ImmutableMultimap.builder(); ImmutableMultimap.Builder<Integer, Pair<CorefChain, CorefMention>> recordsOrdered = ImmutableMultimap.builder(); graph.forEach( (key, value) -> { value .getMentionMap() .forEach( (intPair, corefSet) -> { corefSet.forEach( mention -> records.put(mention.sentNum, Pair.of(value, mention))); }); }); recordsOrdered = records.orderKeysBy( new Comparator<Integer>() { @Override public int compare(Integer o1, Integer o2) { return o1 - o2; } }); recordsOrdered .build() .asMap() .forEach( (sentNum, mentionList) -> { CoreMap sentence = stnfrdSentences.get(sentNum - 1); List<CoreLabel> stnfrdtokens = sentence.get(TokensAnnotation.class); mentionList.forEach( pair -> { CorefChain chain = pair.getLeft(); CorefMention mention = pair.getRight(); String root = chain.getRepresentativeMention().mentionSpan; if (!mention.mentionSpan.equalsIgnoreCase(root) && (!root.contains(mention.mentionSpan) && !mention.mentionSpan.contains(root)) && (!replacementList.contains(root.toLowerCase())) && (root.split("\\s").length < 3) && (replacementList.contains(mention.mentionSpan.toLowerCase()))) { if (mention.mentionSpan.equalsIgnoreCase("her") || mention.mentionSpan.equalsIgnoreCase("his")) { root += "'s"; } stnfrdtokens.get(mention.startIndex - 1).setOriginalText(root); } }); String sent = ""; for (CoreLabel token : stnfrdtokens) { sent += token.originalText() + " "; } ; resolvedSentences += sent + "\n"; }); } public static Tree parse(String str) { List<CoreLabel> tokens = tokenize(str); Tree tree = parser.apply(tokens); return tree; } private static List<CoreLabel> tokenize(String str) { Tokenizer<CoreLabel> tokenizer = tokenizerFactory.getTokenizer(new StringReader(str)); return tokenizer.tokenize(); } public static void startParsing(String paragraph) throws FileNotFoundException, IOException { String parseTrees = ""; // Can we just split on new line as paragraph is already sentence splitted. Reader reader = new StringReader(paragraph); DocumentPreprocessor dp = new DocumentPreprocessor(reader); List<String> sentenceList = new ArrayList<String>(); for (List<HasWord> sentence : dp) { String sentenceString = Sentence.listToString(sentence); sentenceList.add(sentenceString); } for (String sentence : sentenceList) { // System.out.println(sentence); parseTrees += createParseTree(sentence); } writeToFile(parseTrees, "trees.txt"); } public static void writeToFile(String content, String filename) throws IOException { File file = new File(filename); file.delete(); FileWriter fout = new FileWriter(filename); fout.write(content); fout.close(); } public static String createParseTree(String sentence) { Tree tree = parse(sentence); // System.out.println(tree.toString()); return (tree.toString() + "\n"); } }
public ArrayList<String> getKeyWordsDependency(String sentence, String keyword) { LexicalizedParser lp = LexicalizedParser.loadModel( "/home/mingrui/Desktop/englishPCFG.ser.gz", "-maxLength", "80", "-retainTmpSubcategories"); TreebankLanguagePack tlp = new PennTreebankLanguagePack(); // Uncomment the following line to obtain original Stanford Dependencies // tlp.setGenerateOriginalDependencies(true); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); String[] array = sentence.split("\\s+"); Tree parse = lp.apply(Sentence.toWordList(array)); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed(); ArrayList<String> keywordsDependency = new ArrayList<String>(); ArrayList<String> keywordsDependencyWithLemmatization = new ArrayList<String>(); // String lemmatizedKeyword = lemmatize(keyword); for (TypedDependency t : tdl) { String d = t.toString(); String dependencyType = d.substring(0, d.indexOf("(")); String pair = d.substring(d.indexOf("(") + 1, d.indexOf("(")); String[] terms = pair.split(","); String term1 = terms[0].trim(); String term2 = terms[1].trim(); // Match keywords with the terms in the tuples, if matched, add the // tuple into the arraylist String[] wordsplitted = keyword.split(" "); for (String key : wordsplitted) { if (term1.equals(key)) { keywordsDependency.add(t.toString()); } if (term2.equals(key)) { keywordsDependency.add(t.toString()); } } } String lemmatizedKeywords = lemmatize(keyword); int lbefore = keyword.split(" ").length; int lafter = lemmatizedKeywords.split(" ").length; if (lbefore == lafter) { return keywordsDependency; } else { String[] split = keyword.split(" "); for (String s : split) { String[] lemmas = lemmatize(s).split(" "); boolean sameLength = lemmas.length == s.split(" ").length; if (sameLength) { // Compare the length of one key_word or key_phrase before and after // lemmatization continue; } else { for (String tuple : keywordsDependency) { if (getTupleTerms(tuple)[0].equals( s)) { // Find the tuple that contains the original keyword/key_phrase String dependent = getTupleTerms(tuple)[1]; // String[] } } // for(String l : lemma) } } return keywordsDependencyWithLemmatization; } }