Beispiel #1
0
  public void run(String[] args) {
    Params params = validateAndParseParams(args, Params.class);

    String lang = params.getLang();
    CmdLineUtil.checkLanguageCode(lang);

    String country = params.getCountry();
    if (StringsUtil.isNullOrEmpty(country)) {
      throw new TerminateToolException(1, "Country cannot be empty. Example country: BR");
    }

    long start = System.nanoTime();

    ComponentFactory factory;
    try {
      factory = ComponentFactory.create(new Locale(lang, country));
    } catch (InitializationException e) {
      e.printStackTrace();
      throw new TerminateToolException(
          1,
          "Could not find configuration for "
              + lang
              + ". Only "
              + new Locale("pt", "BR")
              + " might be supported for now.");
    }
    Analyzer cogroo = factory.createPipe();

    System.out.println("Loading time [" + ((System.nanoTime() - start) / 1000000) + "ms]");

    Scanner kb = new Scanner(System.in);
    System.out.print("Enter the sentence or 'q' to quit: ");
    String input = kb.nextLine();

    while (!input.equals("q")) {

      CheckDocument document = new CheckDocument();
      document.setText(input);
      cogroo.analyze(document);

      System.out.println(TextUtils.nicePrint(document));

      System.out.print("Enter the sentence or 'q' to quit: ");
      input = kb.nextLine();
    }
  }
Beispiel #2
0
  public void analyze(Document document) {
    List<Sentence> sentences = document.getSentences();

    for (Sentence sentence : sentences) {
      Span[] contractionsSpan;

      synchronized (this.contractionFinder) {
        contractionsSpan = contractionFinder.find(TextUtils.tokensToString(sentence.getTokens()));
      }

      List<Token> newTokens = sentence.getTokens();

      for (int i = contractionsSpan.length - 1; i >= 0; i--) {

        int start = contractionsSpan[i].getStart();

        String lexeme = sentence.getTokens().get(start).getLexeme();
        String[] contractions = ContractionUtility.expand(lexeme);

        Token original = newTokens.remove(start);
        if (contractions != null) {
          for (int j = contractions.length - 1; j >= 0; j--) {
            Token token = new TokenImpl(original.getStart(), original.getEnd(), contractions[j]);
            newTokens.add(start, token);

            String caze = null;
            if (j == 0) caze = "B";
            else if (j == contractions.length - 1) caze = "E";
            else caze = "I";

            token.addContext(Analyzers.CONTRACTION_FINDER, caze);
          }
        } else {
          LOGGER.debug("Missing contraction: " + lexeme);
        }
      }
      sentence.setTokens(newTokens);
    }
  }