public static void main(String[] args) throws IOException { String parserModel = null; String sentimentModel = null; String filename = null; String fileList = null; boolean stdin = false; boolean filterUnknown = false; List<Output> outputFormats = Collections.singletonList(Output.ROOT); Input inputFormat = Input.TEXT; String tlppClass = DEFAULT_TLPP_CLASS; for (int argIndex = 0; argIndex < args.length; ) { if (args[argIndex].equalsIgnoreCase("-sentimentModel")) { sentimentModel = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-parserModel")) { parserModel = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-file")) { filename = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-fileList")) { fileList = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-stdin")) { stdin = true; argIndex++; } else if (args[argIndex].equalsIgnoreCase("-input")) { inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase()); argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-output")) { String[] formats = args[argIndex + 1].split(","); outputFormats = new ArrayList<Output>(); for (String format : formats) { outputFormats.add(Output.valueOf(format.toUpperCase())); } argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) { filterUnknown = true; argIndex++; } else if (args[argIndex].equalsIgnoreCase("-tlppClass")) { tlppClass = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-help")) { help(); System.exit(0); } else { System.err.println("Unknown argument " + args[argIndex + 1]); help(); throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]); } } // We construct two pipelines. One handles tokenization, if // necessary. The other takes tokenized sentences and converts // them to sentiment trees. Properties pipelineProps = new Properties(); Properties tokenizerProps = null; if (sentimentModel != null) { pipelineProps.setProperty("sentiment.model", sentimentModel); } if (parserModel != null) { pipelineProps.setProperty("parse.model", parserModel); } if (stdin) { pipelineProps.setProperty("ssplit.eolonly", "true"); } if (inputFormat == Input.TREES) { pipelineProps.setProperty("annotators", "binarizer, sentiment"); pipelineProps.setProperty( "customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator"); pipelineProps.setProperty("binarizer.tlppClass", tlppClass); pipelineProps.setProperty("enforceRequirements", "false"); } else { pipelineProps.setProperty("annotators", "parse, sentiment"); pipelineProps.setProperty("enforceRequirements", "false"); tokenizerProps = new Properties(); tokenizerProps.setProperty("annotators", "tokenize, ssplit"); } int count = 0; if (filename != null) count++; if (fileList != null) count++; if (stdin) count++; if (count > 1) { throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin"); } if (count == 0) { throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin"); } StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps); StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps); if (filename != null) { // Process a file. The pipeline will do tokenization, which // means it will split it into sentences as best as possible // with the tokenizer. List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown); for (Annotation annotation : annotations) { pipeline.annotate(annotation); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { System.out.println(sentence); outputTree(System.out, sentence, outputFormats); } } } else if (fileList != null) { // Process multiple files. The pipeline will do tokenization, // which means it will split it into sentences as best as // possible with the tokenizer. Output will go to filename.out // for each file. for (String file : fileList.split(",")) { List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown); FileOutputStream fout = new FileOutputStream(file + ".out"); PrintStream pout = new PrintStream(fout); for (Annotation annotation : annotations) { pipeline.annotate(annotation); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { pout.println(sentence); outputTree(pout, sentence, outputFormats); } } pout.flush(); fout.close(); } } else { // Process stdin. Each line will be treated as a single sentence. System.err.println("Reading in text from stdin."); System.err.println("Please enter one sentence per line."); System.err.println("Processing will end when EOF is reached."); BufferedReader reader = IOUtils.readerFromStdin("utf-8"); for (String line; (line = reader.readLine()) != null; ) { line = line.trim(); if (line.length() > 0) { Annotation annotation = tokenizer.process(line); pipeline.annotate(annotation); for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { outputTree(System.out, sentence, outputFormats); } } else { // Output blank lines for blank lines so the tool can be // used for line-by-line text processing System.out.println(""); } } } }