@Override
  protected List<Keyterm> getKeyterms(String question) {
    String document = question;
    System.out.println("CHECK " + question);
    //  int indexSpace = documentLine.indexOf(' ');
    //  String docID = documentLine.substring(0, indexSpace);
    // String document = documentLine.substring(indexSpace + 1);

    Chunker extractor = null;
    try {
      extractor = (Chunker) AbstractExternalizable.readObject(modelFile);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    List<Keyterm> keyterms = new ArrayList<Keyterm>();
    Chunking entities = extractor.chunk(document);
    for (Chunk ne : entities.chunkSet()) {
      // Annotations
      String keyWord = question.substring(ne.start(), ne.end());
      keyterms.add(new Keyterm(keyWord));
    }

    // store the keyterms from chunker here from annotator
    return keyterms;
  }
Пример #2
0
  /**
   * Initializes this resource
   *
   * @return Resource
   * @throws ResourceInstantiationException
   */
  @SuppressWarnings("rawtypes")
  public Resource init() throws ResourceInstantiationException {
    if (modelFileUrl == null) throw new ResourceInstantiationException("No model file provided!");

    try {
      modelFile = new File(modelFileUrl.toURI());
    } catch (URISyntaxException e) {
      throw new ResourceInstantiationException(e);
    }

    if (modelFile == null || !modelFile.exists()) {
      throw new ResourceInstantiationException(
          "modelFile:" + modelFileUrl.toString() + " does not exists");
    }

    try {
      classifier = (LMClassifier) AbstractExternalizable.readObject(modelFile);
    } catch (IOException e) {
      throw new ResourceInstantiationException(e);
    } catch (ClassNotFoundException e) {
      throw new ResourceInstantiationException(e);
    }

    return this;
  }
  @Override
  public void initialize(UimaContext c) throws ResourceInitializationException {
    super.initialize(c);

    try {
      nerModelFile =
          new File(
              new URI(
                  AustinMaKeytermExtractor.class
                      .getResource("/ne-en-bio-genetag.HmmChunker")
                      .toString()));
      chunker = (Chunker) AbstractExternalizable.readObject(nerModelFile);
    } catch (IOException e) {
      System.err.println("IOException in creating chunker");
      throw new ResourceInitializationException(
          "Unable to load NER model file", "load_ner_model_error", new Object[] {nerModelFile}, e);
    } catch (ClassNotFoundException e) {
      System.err.println("ClassNotFoundException in creating chunker");
      throw new ResourceInitializationException(
          "Unable to load NER model file", "load_ner_model_error", new Object[] {nerModelFile}, e);
    } catch (URISyntaxException e) {
      e.printStackTrace();
      throw new ResourceInitializationException(
          "Unable to load NER model file", "load_ner_model_error", new Object[] {nerModelFile}, e);
    }

    // Read in the geneDictionary
    geneDictionary = new HashSet<String>();
    try {
      File geneDictionaryFile =
          new File(new URI(AustinMaKeytermExtractor.class.getResource("/ref.dic").toString()));
      BufferedReader reader = new BufferedReader(new FileReader(geneDictionaryFile));
      String line;
      try {
        while ((line = reader.readLine()) != null) geneDictionary.add(line);
      } catch (IOException e) {
        System.err.println("IOException in reading gene dictionary file");
        throw new ResourceInitializationException(
            "Unable to load gene dictionary file", "load_gene_dic_error", new Object[] {}, e);
      }
    } catch (FileNotFoundException e) {
      System.err.println("FileNotFoundException in reading gene dictionary file");
      throw new ResourceInitializationException(
          "Unable to find gene dictionary file", "load_gene_dic_error", new Object[] {}, e);
    } catch (URISyntaxException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
      throw new ResourceInitializationException(
          "Unable to find gene dictionary file", "load_gene_dic_error", new Object[] {}, e1);
    }
  }
 public static void main(String[] args) {
   String modelFile = "src/main/resources/models/ne-en-bio-genetag.HmmChunker";
   String question =
       "What is the role of PrnP in mad cow disease (bovine spongiform encephalitis, BSE)?";
   try {
     Chunker achunker = (Chunker) AbstractExternalizable.readObject(new File(modelFile));
     Chunking chunking = achunker.chunk(question);
     for (Chunk chunk : chunking.chunkSet()) {
       System.out.println(question.substring(chunk.start(), chunk.end()));
     }
   } catch (Exception e) {
     System.err.println("oops");
   }
 }
  public ChunkerFeatureExtractor() throws ClassNotFoundException, IOException {

    @SuppressWarnings("unchecked") // req for deserialize
    CodeSource src = this.getClass().getProtectionDomain().getCodeSource();
    String loc = src.getLocation().toString();
    File hmmFile =
        new File(
            loc.substring(5, loc.length() - 10)
                + "/objects/pos-en-general-brown.HiddenMarkovModel");
    // File hmmFile = new
    // File("C:/Users/D059348/dev/HU/MaschinelleSprachverarbeitung/objects/pos-en-general-brown.HiddenMarkovModel");
    HiddenMarkovModel posHmm = (HiddenMarkovModel) AbstractExternalizable.readObject(hmmFile);

    FastCache<String, double[]> emissionCache = new FastCache<String, double[]>(100000);
    mPosTagger = new HmmDecoder(posHmm, null, emissionCache);
  }
 @Override
 public void initialize(UimaContext aContext) throws ResourceInitializationException {
   super.initialize(aContext);
   // initialization part from annotator
   String model = (String) aContext.getConfigParameterValue("TrainedModel");
   modelFile = new File(model);
   try {
     extractor = (Chunker) AbstractExternalizable.readObject(modelFile);
   } catch (IOException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (ClassNotFoundException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
 }
Пример #7
0
 /**
  * @param fileName
  * @return
  * @throws IOException
  * @throws ClassNotFoundException
  */
 private LMClassifier<?, ?> deserialize(String fileName)
     throws ClassNotFoundException, IOException {
   return (LMClassifier<?, ?>)
       AbstractExternalizable.readObject(new File(classLoader.getResource(fileName).getFile()));
 }