public void run(String[] args) { Params params = validateAndParseParams(args, Params.class); String lang = params.getLang(); CmdLineUtil.checkLanguageCode(lang); String country = params.getCountry(); if (StringsUtil.isNullOrEmpty(country)) { throw new TerminateToolException(1, "Country cannot be empty. Example country: BR"); } long start = System.nanoTime(); ComponentFactory factory; try { factory = ComponentFactory.create(new Locale(lang, country)); } catch (InitializationException e) { e.printStackTrace(); throw new TerminateToolException( 1, "Could not find configuration for " + lang + ". Only " + new Locale("pt", "BR") + " might be supported for now."); } Analyzer cogroo = factory.createPipe(); System.out.println("Loading time [" + ((System.nanoTime() - start) / 1000000) + "ms]"); Scanner kb = new Scanner(System.in); System.out.print("Enter the sentence or 'q' to quit: "); String input = kb.nextLine(); while (!input.equals("q")) { CheckDocument document = new CheckDocument(); document.setText(input); cogroo.analyze(document); System.out.println(TextUtils.nicePrint(document)); System.out.print("Enter the sentence or 'q' to quit: "); input = kb.nextLine(); } }
public void analyze(Document document) { List<Sentence> sentences = document.getSentences(); for (Sentence sentence : sentences) { Span[] contractionsSpan; synchronized (this.contractionFinder) { contractionsSpan = contractionFinder.find(TextUtils.tokensToString(sentence.getTokens())); } List<Token> newTokens = sentence.getTokens(); for (int i = contractionsSpan.length - 1; i >= 0; i--) { int start = contractionsSpan[i].getStart(); String lexeme = sentence.getTokens().get(start).getLexeme(); String[] contractions = ContractionUtility.expand(lexeme); Token original = newTokens.remove(start); if (contractions != null) { for (int j = contractions.length - 1; j >= 0; j--) { Token token = new TokenImpl(original.getStart(), original.getEnd(), contractions[j]); newTokens.add(start, token); String caze = null; if (j == 0) caze = "B"; else if (j == contractions.length - 1) caze = "E"; else caze = "I"; token.addContext(Analyzers.CONTRACTION_FINDER, caze); } } else { LOGGER.debug("Missing contraction: " + lexeme); } } sentence.setTokens(newTokens); } }