예제 #1
0
  private List<Word> ranking(SWCDocument document, CommandLineArguments cmd) {
    RankingAlgo algo = RankingAlgorithmRegistry.getById(cmd.getRankAlgorithm());
    document.weightFilter(cmd.getMaxWords(), algo);

    List<Word> words = document.getWords();
    if (words.size() < 10)
      throw new RuntimeException("The input text is too short (" + words.size() + " words)");

    return words;
  }
예제 #2
0
  private SWCDocument readDocument(CommandLineArguments cmd) throws FileNotFoundException {
    Scanner scanner =
        cmd.getInputFile() != null
            ? new Scanner(new File(cmd.getInputFile()))
            : new Scanner(System.in);
    StringBuilder sb = new StringBuilder();
    while (scanner.hasNextLine()) {
      sb.append(scanner.nextLine() + "\n");
    }
    scanner.close();

    SWCDocument doc = new SWCDocument(sb.toString());
    doc.parse(cmd.getParseOptions());

    return doc;
  }