/**
  * Initializes static resources.
  *
  * @throws Exception
  */
 public static void initialize() throws Exception {
   if (parser != null) return;
   Properties properties = Properties.loadFromClassName(StanfordParser.class.getName());
   tlp = new PennTreebankLanguagePack();
   gsf = tlp.grammaticalStructureFactory();
   String modelFile = properties.getProperty("modelFile");
   if (modelFile == null) throw new Exception("Required property '" + "modelFile' is undefined");
   parser = new LexicalizedParser(modelFile);
 }
  private Collection<TypedDependency> parseSentenceTDL(String text) {
    System.out.println("Parsing sentence...");

    Collection<TypedDependency> tdl = null;
    TreebankLanguagePack tlp = lp.treebankLanguagePack();
    GrammaticalStructureFactory gsf = null;
    if (tlp.supportsGrammaticalStructures()) {
      gsf = tlp.grammaticalStructureFactory();
    }

    Reader reader = new StringReader(text);

    for (List<HasWord> sentence : new DocumentPreprocessor(reader)) {
      Tree parse = lp.apply(sentence);
      if (gsf != null) {
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        tdl = gs.allTypedDependencies();
      }
    }
    return tdl;
  }
Ejemplo n.º 3
0
  private static List<TypedDependency> getDependencies(String sentence) {

    if (pipeline == null) {
      loadModels();
    }

    TokenizerFactory<CoreLabel> tokenizerFactory =
        PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
    Tokenizer<CoreLabel> tok = tokenizerFactory.getTokenizer(new StringReader(sentence));
    List<CoreLabel> rawWords2 = tok.tokenize();
    Tree parse = lp.apply(rawWords2);
    //        parse.pennPrint();
    //
    //        System.out.println(parse.toString());

    TreebankLanguagePack tlp = lp.treebankLanguagePack(); // PennTreebankLanguagePack for English
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();

    return tdl;
  }
Ejemplo n.º 4
0
public class Parser {

  private String grammar = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
  private String[] options = {"-maxLength", "80", "-retainTmpSubcategories"};
  private LexicalizedParser lp = LexicalizedParser.loadModel(grammar, options);
  private TreebankLanguagePack tlp = lp.getOp().langpack();
  private GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();

  public Parser() {}

  public LinkedList<String> getKeyWrodsFromSentence(String string) {
    LinkedList<String> list = new LinkedList<String>();

    String[] sent = string.split(" ");
    List<HasWord> sentence = new ArrayList<HasWord>();
    for (String word : sent) sentence.add(new Word(word));

    Tree parse = lp.parse(sentence);
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);

    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();

    String[] current;
    String type, key;
    List<CoreLabel> labelsList = parse.taggedLabeledYield();
    for (Label l : labelsList) {
      current = l.toString().split("-");
      type = current[0];
      if (type.equals("NN") || type.equals("NNS")) {
        key = sent[Integer.parseInt(current[1])];
        list.add(key);
      }
    }
    return list;
  }

  public LinkedList<String> getKeyWrodsFromSentenceTest(String string) {

    LinkedList<String> list = new LinkedList<String>();

    String[] sent = string.split(" ");
    List<HasWord> sentence = new ArrayList<HasWord>();
    for (String word : sent) {
      sentence.add(new Word(word));
    }

    Tree parse = lp.parse(sentence);
    parse.pennPrint();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);

    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);

    System.out.println();

    System.out.println("The words of the sentence:");
    for (Label lab : parse.yield()) {
      if (lab instanceof CoreLabel) {
        System.out.println(((CoreLabel) lab).toString(CoreLabel.OutputFormat.VALUE_MAP));
      } else {
        System.out.println(lab);
      }
    }
    System.out.println();
    System.out.println("tagged");
    System.out.println(parse.taggedYield());

    List<CoreLabel> temp = parse.taggedLabeledYield();
    for (Label l : temp) {
      String[] sss = l.toString().split("-");
      String type = sss[0];
      System.out.println(sss[0] + "  " + sss[1] + "    " + sent[Integer.parseInt(sss[1])]);
    }

    for (Iterator<String> ite = list.iterator(); ite.hasNext(); ) System.out.println(ite.next());
    return list;
  }

  public static void main(String[] args) throws IOException {
    Parser parser = new Parser();
    parser.getKeyWrodsFromSentence(
        "When athletes begin to exercise, their heart rates and respiration rates increase.  At what level of organization does the human body coordinate these functions?");
    parser.getKeyWrodsFromSentenceTest(
        "When athletes begin to exercise, their heart rates and respiration rates increase.  At what level of organization does the human body coordinate these functions?");
    // main2();

  }
}
Ejemplo n.º 5
0
 private Collection<TypedDependency> getDependencies(Tree sentenceParseTree) {
   final TreebankLanguagePack tlp = new PennTreebankLanguagePack();
   final GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
   final GrammaticalStructure gs = gsf.newGrammaticalStructure(sentenceParseTree);
   return gs.typedDependenciesCollapsed();
 }
  public static void main(String args[]) throws IOException {
    long startTime = System.currentTimeMillis();

    LexicalizedParser lp = new LexicalizedParser("englishPCFG.ser.gz");
    TokenizerFactory tf = PTBTokenizer.factory(false, new WordTokenFactory());
    TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
    String sentence = "Where did the first President die ?";

    System.out.println("Enter the question or press enter for default : ");
    String tempInput;
    BufferedReader b1 = new BufferedReader(new InputStreamReader(System.in));
    tempInput = b1.readLine();
    if (tempInput.length() == 0)
      System.out.println("The question is the default one : " + sentence);
    else {
      sentence = tempInput;
      System.out.println("The question entered is : " + sentence);
    }

    String sentence1 = PreProcess.removeStopWords1(sentence);

    System.out.println(sentence1);
    StringTokenizer st1 = new StringTokenizer(sentence1, " ");
    int n = 0;
    while (st1.hasMoreTokens()) {
      String temp1 = st1.nextToken();
      //	System.out.println("temp replace all is
      // "+temp1.replaceAll("'s","").replaceAll("[^A-Za-z]",""));
      map.put(n, temp1.replaceAll("'s", "").replaceAll("[^A-Za-z]", ""));

      n++;
    }
    //	for(int s=0;s<n;s++)
    //		System.out.println(map.get(s));
    List tokens = tf.getTokenizer(new StringReader(sentence)).tokenize();
    lp.parse(tokens); // parse the tokens
    Tree t = lp.getBestParse(); // get the best parse tree\

    tp.printTree(t);
    System.out.println("\nPROCESSED:\n\n"); // tp.printTree(t); // print tree
    // dependencies only print
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(t);

    // dependencies

    //		Tree b = t.firstChild();
    //	System.out.println("\nFirst child of the tree is :\n\n"); tp.printTree(b);
    String dependency = gs.typedDependenciesCollapsed().toString();
    System.out.println("Dependencies :" + dependency);
    //	BufferedReader reader = new BufferedReader( new InputStreamReader(System.in) );
    //	String wordForm = reader.readLine();
    String wordForm = "yes";
    int i = -1;
    String s[][] = new String[20][3];

    if (wordForm.equals("yes")) {
      StringTokenizer st = new StringTokenizer(dependency, " ([)],");
      while (st.hasMoreTokens()) {
        String as = st.nextToken();
        System.out.println(as);
        if (!as.contains("-")) {
          i++;
          s[i][0] = as;
        } else {
          s[i][1] = as;
          s[i][2] = st.nextToken();
        }
      }
    }

    length = i + 1;
    interchange1(s);
    System.out.println("The sorted version is ");
    //	System.out.println("\n\n***********Li8 from here on***********");
    for (i = 0; i < length; i++) {
      for (int j = 0; j < 3; j++) {
        System.out.print(s[i][j] + " ");
      }
      System.out.println();
    }

    // int adjmatrix[][] = new int[length][length];
    System.out.println("What answer type is required: ");
    BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));

    String answtype = reader.readLine();
    String[] temp;
    temp = sentence.split(" ", 2);
    int g = 0;
    int h = 0;
    String secque = null;

    // dijikstra implementation
    int adjmatrix[][] = new int[length][length];
    int j = 0;
    for (i = 0; i < length; i++) for (j = 0; j < length; j++) adjmatrix[i][j] = 100;
    formadj(adjmatrix, s);
    print(adjmatrix);
    //	Dijikstraalgo.dijikstra(adjmatrix,length-2);
    //	Dijikstraalgo.dijikstra(adjmatrix,length-1);
    if (Dijikstraalgo.dijikstra(adjmatrix, length - 1)
            - Dijikstraalgo.dijikstra(adjmatrix, length - 2)
        >= 0) {
      System.out.println("Type 1");
      if (makesentence(s, length - 1) == null) {
        secque = s[length - 1][2] + " " + s[length - 1][1];
        System.out.println(answtype + " is " + s[length - 1][2] + " " + s[length - 1][1] + " ?");

      } else {
        secque = makesentence(s, length - 1);
        System.out.println(answtype + " is " + secque + " ?");
      }
    } else {
      System.out.println("Type 2");
      System.out.println(
          "Before entering the makesentence function(the cause of the null pointer exception) "
              + s[length - 2][0]
              + " "
              + s[length - 2][1]);
      if (makesentence(s, length - 2) == null) {

        secque = s[length - 2][2] + " " + s[length - 2][1];
        System.out.println(answtype + " is " + s[length - 2][2] + " " + s[length - 2][1] + " ?");
      } else {
        //	System.out.println("null");
        secque = makesentence(s, length - 2);

        System.out.println(answtype + " is " + secque + " ?");
      }
    }
    //	System.out.println("Secque is "+secque.replaceAll("[^A-Za-z ]",""));
    System.out.println(sentence.replace(secque.replaceAll("[^A-Za-z ]", ""), ""));

    long endTime = System.currentTimeMillis();
    System.out.println("The time elapsed is : " + (int) (endTime - startTime) / 1000);
    System.out.println("The end");
  }
Ejemplo n.º 7
0
		public HashMap<String,ArrayList<TreeData>> parseAllDocs() throws IOException{ 
			String grammar =  "./jsan_resources/englishPCFG.ser.gz";
			String[] options = { "-maxLength", "120", "-retainTmpSubcategories" };
//			LexicalizedParser lp = new LexicalizedParser(grammar, options);
			
			LexicalizedParser lp = new LexicalizedParser()
			TreebankLanguagePack tlp = new PennTreebankLanguagePack();
			
			GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
			Iterable<List<? extends HasWord>> sentences;
			ArrayList<HashMap<String,ArrayList<String>>> everything = new ArrayList<HashMap<String,ArrayList<String>>>(3); 
			everything.add(0,otherSampleStrings);
			everything.add(1,authorSampleStrings);
			everything.add(2,toModifyStrings);
			Iterator<HashMap<String,ArrayList<String>>> everythingIter = everything.iterator();
			int docTypeNumber = -1; // 0 for otherSampleStrings, 1 for authorSampleStrings, 2 for toModifyStrings
			int numLoaded = 0;
			while(everythingIter.hasNext()){
				docTypeNumber++;
				HashMap<String,ArrayList<String>> currentSampleStrings = docPathFinder();
				Set<String> currentDocStrings = currentSampleStrings.keySet();
				Iterator<String> docStrIter = currentDocStrings.iterator();
				String docID;
				ArrayList<String> sentenceTokens;
				allTreeProcessors[docTypeNumber]  = new TreeProcessor();
				allTreeProcessors[docTypeNumber].clearLoadedTreeDataMaps();
				numLoaded=0;
				while(docStrIter.hasNext()){
					docID = docStrIter.next();
					sentenceTokens = currentSampleStrings.get(docID);
					if(sentenceTokens == null){
						allTreeProcessors[docTypeNumber].loadTreeDataMap(docID, GRAMMAR_DIR, false);
						numLoaded++;
						continue;
					}
					//System.out.println(sentenceTokens.size()+", strIter.hasNext? -> "+strIter.hasNext());

					numSentences = sentenceTokens.size();
					//initialize(numSentences);
					Iterator<String> sentIter = sentenceTokens.iterator();
					List<List<? extends HasWord>> tmp = new ArrayList<List<? extends HasWord>>();
					String tempSent;
					while(sentIter.hasNext()){
						tempSent = sentIter.next();
						Tokenizer<? extends HasWord> toke = tlp.getTokenizerFactory().getTokenizer(new StringReader(tempSent));
						List<? extends HasWord> sentenceTokenized = toke.tokenize();
						tmp.add(sentenceTokenized);
					}
					
					sentences = tmp;
					//int numDone = 0;
					TreeProcessor.singleDocMap.clear();
					boolean willSaveResults = true;
					for (List<? extends HasWord> sentence : sentences) {
						Tree parse = lp.apply(sentence);
						//parse.pennPrint();
						//System.out.println(parse.treeSkeletonCopy().toString());
						//System.out.println(parse.taggedYield());
						//System.out.println();
						//printSubTrees(parse);
						//TreeContainer.recurseTree(parse,"breadth");
						allTreeProcessors[docTypeNumber].processTree(parse, 0, willSaveResults); 
						//System.out.println(tc.processedTrees.toString().replaceAll("\\]\\], \\(","\\]\\]\n\\("));
						//numDone++;
						//System.out.println("sent "+numDone+" of "+numSentences+" done ");
						//System.out.println(tc.processedTrees.toString());
						//in.nextLine();
						//TreeContainer.recurseTree(parse, "depth");
						//in.nextLine();
						//addTree(parse);
						//GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);//TODO: LOOK AT THIS
						//Collection tdl = gs.typedDependenciesCCprocessed(true);
						//System.out.println(tdl);
						//System.out.println();
					}
					if(willSaveResults == true)
						ObjectIO.writeObject(TreeProcessor.singleDocMap,docID, GRAMMAR_DIR);

					//System.out.println("After all sents: ");
					//System.out.println(tc.processedTrees.toString().replaceAll("\\]\\], \\(","\\]\\]\n\\("));
					//String sent3 = "This is one last test!";
					//Tree parse3 = lp.apply(sent3);
					//parse3.pennPrint();
					//System.out.println("After sorting and writing:");
					//System.out.println(tc.processedTrees.toString().replaceAll("\\]\\], \\(","\\]\\]\n\\("));
					//Scanner in = new Scanner(System.in);
					//System.out.println("First one done.");
					//in.nextLine();
					//viewTrees();
				}
				
				//TreeProcessor.writeTreeDataToCSV(sortedTD,docID);
				allTreeProcessors[docTypeNumber].unmergedMaps = new ArrayList<HashMap<String,TreeData>>(numLoaded+1);
				
			}	
			
			
			int i= 0;
			allParsedAndOrdered.clear();
			String[] docTypes = new String[]{"otherSample","authorSample","toModify"};
			for(i=0; i < 3; i++){
				allTreeProcessors[i].unmergedMaps.add(allTreeProcessors[i].processedTrees);
				allTreeProcessors[i].unmergedMaps.addAll(allTreeProcessors[i].loadedTreeDataMaps);
				allTreeProcessors[i].mergeTreeDataLists(allTreeProcessors[i].unmergedMaps);
				allParsedAndOrdered.put(docTypes[i],allTreeProcessors[i].sortTreeData(allTreeProcessors[i].mergedMap));
				
			}
			
			//ArrayList<TreeData> sortedTD = TreeContainer.sortTreeData(TreeContainer.allProcessedTrees);
			//TreeContainer.writeTreeDataToCSV(sortedTD,"ALL_AUTHORS");
			
			return allParsedAndOrdered;
		}
Ejemplo n.º 8
0
  public ArrayList<String> getKeyWordsDependency(String sentence, String keyword) {
    LexicalizedParser lp =
        LexicalizedParser.loadModel(
            "/home/mingrui/Desktop/englishPCFG.ser.gz",
            "-maxLength",
            "80",
            "-retainTmpSubcategories");
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    // Uncomment the following line to obtain original Stanford Dependencies
    // tlp.setGenerateOriginalDependencies(true);
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    String[] array = sentence.split("\\s+");
    Tree parse = lp.apply(Sentence.toWordList(array));
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
    ArrayList<String> keywordsDependency = new ArrayList<String>();
    ArrayList<String> keywordsDependencyWithLemmatization = new ArrayList<String>();
    // String lemmatizedKeyword = lemmatize(keyword);
    for (TypedDependency t : tdl) {
      String d = t.toString();
      String dependencyType = d.substring(0, d.indexOf("("));
      String pair = d.substring(d.indexOf("(") + 1, d.indexOf("("));
      String[] terms = pair.split(",");
      String term1 = terms[0].trim();
      String term2 = terms[1].trim();

      // Match keywords with the terms in the tuples, if matched, add the
      // tuple into the arraylist
      String[] wordsplitted = keyword.split(" ");
      for (String key : wordsplitted) {
        if (term1.equals(key)) {
          keywordsDependency.add(t.toString());
        }
        if (term2.equals(key)) {
          keywordsDependency.add(t.toString());
        }
      }
    }

    String lemmatizedKeywords = lemmatize(keyword);
    int lbefore = keyword.split(" ").length;
    int lafter = lemmatizedKeywords.split(" ").length;
    if (lbefore == lafter) {
      return keywordsDependency;
    } else {
      String[] split = keyword.split(" ");
      for (String s : split) {
        String[] lemmas = lemmatize(s).split(" ");
        boolean sameLength = lemmas.length == s.split(" ").length;
        if (sameLength) { // Compare the length of one key_word or key_phrase before and after
                          // lemmatization
          continue;
        } else {
          for (String tuple : keywordsDependency) {
            if (getTupleTerms(tuple)[0].equals(
                s)) { // Find the tuple that contains the original keyword/key_phrase
              String dependent = getTupleTerms(tuple)[1];
              // String[]
            }
          }
          // for(String l : lemma)
        }
      }
      return keywordsDependencyWithLemmatization;
    }
  }