public ObjectStream<SentenceSample> create(String[] args) {
    Parameters params = ArgumentParser.parse(args, Parameters.class);

    ObjectStream<POSSample> posSampleStream =
        StreamFactoryRegistry.getFactory(POSSample.class, StreamFactoryRegistry.DEFAULT_FORMAT)
            .create(ArgumentParser.filter(args, WordTagSampleStreamFactory.Parameters.class));
    return new POSToSentenceSampleStream(createDetokenizer(params), posSampleStream, 30);
  }
  public ObjectStream<ChunkSample> create(String[] args) {

    Parameters params = ArgumentParser.parse(args, Parameters.class);

    language = params.getLang();

    FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());

    ObjectStream<String> lineStream =
        new PlainTextByLineStream(sampleDataIn.getChannel(), params.getEncoding());

    ADChunkBasedShallowParserSampleStream sampleStream =
        new ADChunkBasedShallowParserSampleStream(
            lineStream,
            params.getFunctTags(),
            params.getIsIncludePOSTags(),
            params.getUseCGTags(),
            params.getExpandME());

    if (params.getStart() != null && params.getStart() > -1) {
      sampleStream.setStart(params.getStart());
    }

    if (params.getEnd() != null && params.getEnd() > -1) {
      sampleStream.setEnd(params.getEnd());
    }

    return sampleStream;
  }
コード例 #3
0
  public ObjectStream<NameSample> create(String[] args) {

    Parameters params = ArgumentParser.parse(args, Parameters.class);
    int typesToGenerate = 0;

    if (params.getTypes().contains("DNA")) {
      typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_DNA_ENTITIES;
    } else if (params.getTypes().contains("protein")) {
      typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_PROTEIN_ENTITIES;
    } else if (params.getTypes().contains("cell_type")) {
      typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_CELLTYPE_ENTITIES;
    } else if (params.getTypes().contains("cell_line")) {
      typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_CELLLINE_ENTITIES;
    } else if (params.getTypes().contains("RNA")) {
      typesToGenerate = typesToGenerate | BioNLP2004NameSampleStream.GENERATE_RNA_ENTITIES;
    }

    return new BioNLP2004NameSampleStream(
        CmdLineUtil.openInFile(new File(params.getData())), typesToGenerate);
  }
コード例 #4
0
 public boolean validateArguments(String[] args) {
   return ArgumentParser.validateArguments(args, Parameters.class);
 }
コード例 #5
0
 public String getUsage() {
   return ArgumentParser.createUsage(Parameters.class);
 }