public Pipe buildPipe() { ArrayList<Pipe> pipeList = new ArrayList<Pipe>(); pipeList.add(new Input2CharSequence("UTF-8")); Pattern tokenPattern = Pattern.compile("[\\p{L}\\p{N}_]+"); pipeList.add(new CharSequence2TokenSequence(tokenPattern)); pipeList.add(new TokenSequenceRemoveStopwords(false, false)); pipeList.add(new TokenSequence2FeatureSequence()); // pipeList.add(new FeatureSequence2FeatureVector()); // pipeList.add(new PrintInputAndTarget()); SerialPipes serialPipeList = new SerialPipes(pipeList); serialPipeList.setDataAlphabet(alphabet); return serialPipeList; }
public void setPreTagger(banner.tagging.Tagger preTagger) { ((Pretagger) pipe.getPipe(2)).setPreTagger(preTagger); }
public void setPosTagger(dragon.nlp.tool.Tagger posTagger) { ((LemmaPOS) pipe.getPipe(1)).setPosTagger(posTagger); }
public void setLemmatiser(Lemmatiser lemmatiser) { ((LemmaPOS) pipe.getPipe(1)).setLemmatiser(lemmatiser); }