public Pipe buildPipe() {
   ArrayList<Pipe> pipeList = new ArrayList<Pipe>();
   pipeList.add(new Input2CharSequence("UTF-8"));
   Pattern tokenPattern = Pattern.compile("[\\p{L}\\p{N}_]+");
   pipeList.add(new CharSequence2TokenSequence(tokenPattern));
   pipeList.add(new TokenSequenceRemoveStopwords(false, false));
   pipeList.add(new TokenSequence2FeatureSequence());
   // pipeList.add(new FeatureSequence2FeatureVector());
   // pipeList.add(new PrintInputAndTarget());
   SerialPipes serialPipeList = new SerialPipes(pipeList);
   serialPipeList.setDataAlphabet(alphabet);
   return serialPipeList;
 }
Example #2
0
 public void setPreTagger(banner.tagging.Tagger preTagger) {
   ((Pretagger) pipe.getPipe(2)).setPreTagger(preTagger);
 }
Example #3
0
 public void setPosTagger(dragon.nlp.tool.Tagger posTagger) {
   ((LemmaPOS) pipe.getPipe(1)).setPosTagger(posTagger);
 }
Example #4
0
 public void setLemmatiser(Lemmatiser lemmatiser) {
   ((LemmaPOS) pipe.getPipe(1)).setLemmatiser(lemmatiser);
 }