Ejemplo n.º 1
0
 private static void displayTree(Tree t, TreePrint tp, PrintWriter pw) {
   if (t == null) {
     pw.println("null");
   } else {
     tp.printTree(t, pw);
   }
 }
  public static void main(String[] args) // start of the main method
      {
    System.out.println("\n\n\nSTART\n\n\n"); // print START
    try // device to handle potential errors
    {
      // open file whose path is passed
      // as the first argument of the main method:
      FileInputStream fis = new FileInputStream(args[0]);
      DataInputStream dis = new DataInputStream(fis);
      BufferedReader br = new BufferedReader(new InputStreamReader(dis));

      // prepare Parser, Tokenizer and Tree printer:
      LexicalizedParser lp = new LexicalizedParser("englishPCFG.ser.gz");
      TokenizerFactory tf = PTBTokenizer.factory(false, new WordTokenFactory());
      TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");

      String sentence; // initialization
      // for each line of the file
      // retrieve it as a string called 'sentence':
      while ((sentence = br.readLine()) != null) {
        // print sentence:
        System.out.println("\n\n\n\nORIGINAL:\n\n" + sentence);
        // put tokens in a list:
        List tokens = tf.getTokenizer(new StringReader(sentence)).tokenize();
        lp.parse(tokens); // parse the tokens
        Tree t = lp.getBestParse(); // get the best parse tree
        System.out.println("\nPROCESSED:\n\n");
        tp.printTree(t); // print tree
      }
      dis.close(); // close input file
    } catch (Exception e) // catch error if any
    {
      System.err.println("ERROR: " + e.getMessage()); // print error message
    }
    System.out.println("\n\n\nTHE END\n\n\n"); // print THE END
  } // end of the main method
  public ParseResult parseSentence(String sentence) {
    String result = "";

    // see if a parser socket server is available
    int port = new Integer(ARKref.getProperties().getProperty("parserServerPort", "5556"));
    String host = "127.0.0.1";
    Socket client;
    PrintWriter pw;
    BufferedReader br;
    String line;
    try {
      client = new Socket(host, port);

      pw = new PrintWriter(client.getOutputStream());
      br = new BufferedReader(new InputStreamReader(client.getInputStream()));
      pw.println(sentence);
      pw.flush(); // flush to complete the transmission
      while ((line = br.readLine()) != null) {
        // if(!line.matches(".*\\S.*")){
        //        System.out.println();
        // }
        if (br.ready()) {
          line = line.replaceAll("\n", "");
          line = line.replaceAll("\\s+", " ");
          result += line + " ";
        } else {
          lastParseScore = new Double(line);
        }
      }

      br.close();
      pw.close();
      client.close();

      System.err.println("parser output:" + result);

      lastParse = readTreeFromString(result);
      boolean success = !Strings.normalizeWhitespace(result).equals("(ROOT (. .))");
      return new ParseResult(success, lastParse, lastParseScore);
    } catch (Exception ex) {

      // ex.printStackTrace();
    }

    // if socket server not available, then use a local parser object
    if (parser == null) {
      if (DEBUG) System.err.println("Could not connect to parser server.  Loading parser...");
      try {
        Options op = new Options();
        String serializedInputFileOrUrl =
            ClassLoader.getSystemResource(
                    ARKref.getProperties()
                        .getProperty("parserGrammarFile", "lib/englishPCFG.ser.gz"))
                .toExternalForm();
        parser = LexicalizedParser.loadModel(serializedInputFileOrUrl, op);
        //				int maxLength = new Integer(ARKref.getProperties().getProperty("parserMaxLength",
        // "40")).intValue();
        //				parser.setMaxLength(maxLength);
        parser.setOptionFlags("-outputFormat", "oneline");
      } catch (Exception e) {
        e.printStackTrace();
      }
    }

    try {
      DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(sentence));

      LexicalizedParserQuery query = parser.parserQuery();

      if (query.parse(dp.iterator().next())) {
        lastParse = query.getBestParse();
        lastParseScore = query.getPCFGScore();
        TreePrint tp = new TreePrint("penn", "", new PennTreebankLanguagePack());
        StringWriter sb = new StringWriter();
        pw = new PrintWriter(sb);
        tp.printTree(lastParse, pw);
        pw.flush();
        lastParse = readTreeFromString(sb.getBuffer().toString());

        return new ParseResult(true, lastParse, lastParseScore);
      }
    } catch (Exception e) {
    }

    lastParse = readTreeFromString("(ROOT (. .))");
    lastParseScore = -99999.0;
    return new ParseResult(false, lastParse, lastParseScore);
  }
Ejemplo n.º 4
0
 // todo: add an option to only print each tree once, regardless.  Most useful in conjunction
 // with -w
 public void visitTree(Tree t) {
   treeNumber++;
   if (printTree) {
     pw.print(treeNumber + ":");
     pw.println("Next tree read:");
     tp.printTree(t, pw);
   }
   TregexMatcher match = p.matcher(t);
   if (printNonMatchingTrees) {
     if (match.find()) numMatches++;
     else tp.printTree(t, pw);
     return;
   }
   Tree lastMatchingRootNode = null;
   while (match.find()) {
     if (oneMatchPerRootNode) {
       if (lastMatchingRootNode == match.getMatch()) continue;
       else lastMatchingRootNode = match.getMatch();
     }
     numMatches++;
     if (printFilename && treebank instanceof DiskTreebank) {
       DiskTreebank dtb = (DiskTreebank) treebank;
       pw.print("# ");
       pw.println(dtb.getCurrentFilename());
     }
     if (printSubtreeCode) {
       pw.print(treeNumber);
       pw.print(':');
       pw.println(match.getMatch().nodeNumber(t));
     }
     if (printMatches) {
       if (reportTreeNumbers) {
         pw.print(treeNumber);
         pw.print(": ");
       }
       if (printTree) {
         pw.println("Found a full match:");
       }
       if (printWholeTree) {
         tp.printTree(t, pw);
       } else if (handles != null) {
         if (printTree) {
           pw.println("Here's the node you were interested in:");
         }
         for (String handle : handles) {
           Tree labeledNode = match.getNode(handle);
           if (labeledNode == null) {
             System.err.println(
                 "Error!!  There is no matched node \""
                     + handle
                     + "\"!  Did you specify such a label in the pattern?");
           } else {
             tp.printTree(labeledNode, pw);
           }
         }
       } else {
         tp.printTree(match.getMatch(), pw);
       }
       // pw.println();  // TreePrint already puts a blank line in
     } // end if (printMatches)
   } // end while match.find()
 } // end visitTree
  public static void main(String args[]) throws IOException {
    long startTime = System.currentTimeMillis();

    LexicalizedParser lp = new LexicalizedParser("englishPCFG.ser.gz");
    TokenizerFactory tf = PTBTokenizer.factory(false, new WordTokenFactory());
    TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
    String sentence = "Where did the first President die ?";

    System.out.println("Enter the question or press enter for default : ");
    String tempInput;
    BufferedReader b1 = new BufferedReader(new InputStreamReader(System.in));
    tempInput = b1.readLine();
    if (tempInput.length() == 0)
      System.out.println("The question is the default one : " + sentence);
    else {
      sentence = tempInput;
      System.out.println("The question entered is : " + sentence);
    }

    String sentence1 = PreProcess.removeStopWords1(sentence);

    System.out.println(sentence1);
    StringTokenizer st1 = new StringTokenizer(sentence1, " ");
    int n = 0;
    while (st1.hasMoreTokens()) {
      String temp1 = st1.nextToken();
      //	System.out.println("temp replace all is
      // "+temp1.replaceAll("'s","").replaceAll("[^A-Za-z]",""));
      map.put(n, temp1.replaceAll("'s", "").replaceAll("[^A-Za-z]", ""));

      n++;
    }
    //	for(int s=0;s<n;s++)
    //		System.out.println(map.get(s));
    List tokens = tf.getTokenizer(new StringReader(sentence)).tokenize();
    lp.parse(tokens); // parse the tokens
    Tree t = lp.getBestParse(); // get the best parse tree\

    tp.printTree(t);
    System.out.println("\nPROCESSED:\n\n"); // tp.printTree(t); // print tree
    // dependencies only print
    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(t);

    // dependencies

    //		Tree b = t.firstChild();
    //	System.out.println("\nFirst child of the tree is :\n\n"); tp.printTree(b);
    String dependency = gs.typedDependenciesCollapsed().toString();
    System.out.println("Dependencies :" + dependency);
    //	BufferedReader reader = new BufferedReader( new InputStreamReader(System.in) );
    //	String wordForm = reader.readLine();
    String wordForm = "yes";
    int i = -1;
    String s[][] = new String[20][3];

    if (wordForm.equals("yes")) {
      StringTokenizer st = new StringTokenizer(dependency, " ([)],");
      while (st.hasMoreTokens()) {
        String as = st.nextToken();
        System.out.println(as);
        if (!as.contains("-")) {
          i++;
          s[i][0] = as;
        } else {
          s[i][1] = as;
          s[i][2] = st.nextToken();
        }
      }
    }

    length = i + 1;
    interchange1(s);
    System.out.println("The sorted version is ");
    //	System.out.println("\n\n***********Li8 from here on***********");
    for (i = 0; i < length; i++) {
      for (int j = 0; j < 3; j++) {
        System.out.print(s[i][j] + " ");
      }
      System.out.println();
    }

    // int adjmatrix[][] = new int[length][length];
    System.out.println("What answer type is required: ");
    BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));

    String answtype = reader.readLine();
    String[] temp;
    temp = sentence.split(" ", 2);
    int g = 0;
    int h = 0;
    String secque = null;

    // dijikstra implementation
    int adjmatrix[][] = new int[length][length];
    int j = 0;
    for (i = 0; i < length; i++) for (j = 0; j < length; j++) adjmatrix[i][j] = 100;
    formadj(adjmatrix, s);
    print(adjmatrix);
    //	Dijikstraalgo.dijikstra(adjmatrix,length-2);
    //	Dijikstraalgo.dijikstra(adjmatrix,length-1);
    if (Dijikstraalgo.dijikstra(adjmatrix, length - 1)
            - Dijikstraalgo.dijikstra(adjmatrix, length - 2)
        >= 0) {
      System.out.println("Type 1");
      if (makesentence(s, length - 1) == null) {
        secque = s[length - 1][2] + " " + s[length - 1][1];
        System.out.println(answtype + " is " + s[length - 1][2] + " " + s[length - 1][1] + " ?");

      } else {
        secque = makesentence(s, length - 1);
        System.out.println(answtype + " is " + secque + " ?");
      }
    } else {
      System.out.println("Type 2");
      System.out.println(
          "Before entering the makesentence function(the cause of the null pointer exception) "
              + s[length - 2][0]
              + " "
              + s[length - 2][1]);
      if (makesentence(s, length - 2) == null) {

        secque = s[length - 2][2] + " " + s[length - 2][1];
        System.out.println(answtype + " is " + s[length - 2][2] + " " + s[length - 2][1] + " ?");
      } else {
        //	System.out.println("null");
        secque = makesentence(s, length - 2);

        System.out.println(answtype + " is " + secque + " ?");
      }
    }
    //	System.out.println("Secque is "+secque.replaceAll("[^A-Za-z ]",""));
    System.out.println(sentence.replace(secque.replaceAll("[^A-Za-z ]", ""), ""));

    long endTime = System.currentTimeMillis();
    System.out.println("The time elapsed is : " + (int) (endTime - startTime) / 1000);
    System.out.println("The end");
  }