/*     */ public void writeExternal(ObjectOutput objOut) throws IOException {
   /* 408 */ objOut.writeLong(this.mTrainer.mNumTrainingChars);
   /* 409 */ objOut.writeObject(this.mTrainer.mLM);
   /* 410 */ boolean tokenizing = this.mTrainer.mTokenizerFactory != null;
   /* 411 */ objOut.writeBoolean(tokenizing);
   /* 412 */ if (tokenizing) {
     /* 413 */ AbstractExternalizable.serializeOrCompile(
         this.mTrainer.mTokenizerFactory, objOut);
     /* 414 */ objOut.writeObject(this.mTrainer.mTokenCounter);
     /*     */ }
   /* 416 */ AbstractExternalizable.serializeOrCompile(this.mTrainer.mEditDistance, objOut);
   /*     */ }
示例#2
0
 /**
  * Create the filtered base data for the model to train on
  *
  * @return
  * @throws IOException
  * @throws ClassNotFoundException
  */
 private LMClassifier<?, ?> createModelInputAndTrain(
     boolean readExistingModel, String trainingDataFile)
     throws IOException, ClassNotFoundException {
   final String NAIVE_BAYES_SERIALIZED_MODEL = "naiveBayesModel";
   if (readExistingModel) {
     return deserialize(NAIVE_BAYES_SERIALIZED_MODEL);
   }
   List<String> lines = IOUtils.readLines(classLoader.getResourceAsStream(INPUT_TWEETS_ORIG_TXT));
   List<String> linesNoStopWords = new ArrayList<String>();
   LMClassifier<?, ?> cls =
       NaiveBayesClassifier.createNGramProcess(new String[] {POSITIVE, NEGATIVE, NEUTRAL}, 7);
   Classification posClassification = new Classification(POSITIVE);
   Classification negClassification = new Classification(NEGATIVE);
   Classified<String> classified = null;
   for (String line : lines) {
     String newLine = NLPUtils.removeStopWords(line);
     if (POSITIVE.equals(newLine.split(" ")[0])) {
       classified = new Classified<String>(newLine, posClassification);
     } else {
       classified = new Classified<String>(newLine, negClassification);
     }
     linesNoStopWords.add(NLPUtils.removeStopWords(line));
     ((ObjectHandler) cls).handle(classified);
   }
   AbstractExternalizable.compileTo((Compilable) cls, new File(NAIVE_BAYES_SERIALIZED_MODEL));
   return cls;
 }
  @Override
  protected List<Keyterm> getKeyterms(String question) {
    String document = question;
    System.out.println("CHECK " + question);
    //  int indexSpace = documentLine.indexOf(' ');
    //  String docID = documentLine.substring(0, indexSpace);
    // String document = documentLine.substring(indexSpace + 1);

    Chunker extractor = null;
    try {
      extractor = (Chunker) AbstractExternalizable.readObject(modelFile);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    List<Keyterm> keyterms = new ArrayList<Keyterm>();
    Chunking entities = extractor.chunk(document);
    for (Chunk ne : entities.chunkSet()) {
      // Annotations
      String keyWord = question.substring(ne.start(), ne.end());
      keyterms.add(new Keyterm(keyWord));
    }

    // store the keyterms from chunker here from annotator
    return keyterms;
  }
示例#4
0
  /**
   * Initializes this resource
   *
   * @return Resource
   * @throws ResourceInstantiationException
   */
  @SuppressWarnings("rawtypes")
  public Resource init() throws ResourceInstantiationException {
    if (modelFileUrl == null) throw new ResourceInstantiationException("No model file provided!");

    try {
      modelFile = new File(modelFileUrl.toURI());
    } catch (URISyntaxException e) {
      throw new ResourceInstantiationException(e);
    }

    if (modelFile == null || !modelFile.exists()) {
      throw new ResourceInstantiationException(
          "modelFile:" + modelFileUrl.toString() + " does not exists");
    }

    try {
      classifier = (LMClassifier) AbstractExternalizable.readObject(modelFile);
    } catch (IOException e) {
      throw new ResourceInstantiationException(e);
    } catch (ClassNotFoundException e) {
      throw new ResourceInstantiationException(e);
    }

    return this;
  }
 /*     */ public void writeExternal(ObjectOutput objOut) throws IOException {
   /* 364 */ this.mTrainer.mLM.compileTo(objOut);
   /* 365 */ boolean tokenizing = this.mTrainer.mTokenizerFactory != null;
   /* 366 */ objOut.writeBoolean(tokenizing);
   /* 367 */ if (tokenizing) {
     /* 368 */ Set keySet = this.mTrainer.mTokenCounter.keySet();
     /* 369 */ objOut.writeObject(new HashSet(keySet));
     /*     */ }
   /* 371 */ AbstractExternalizable.compileOrSerialize(this.mTrainer.mEditDistance, objOut);
   /*     */ }
  @Override
  public void initialize(UimaContext c) throws ResourceInitializationException {
    super.initialize(c);

    try {
      nerModelFile =
          new File(
              new URI(
                  AustinMaKeytermExtractor.class
                      .getResource("/ne-en-bio-genetag.HmmChunker")
                      .toString()));
      chunker = (Chunker) AbstractExternalizable.readObject(nerModelFile);
    } catch (IOException e) {
      System.err.println("IOException in creating chunker");
      throw new ResourceInitializationException(
          "Unable to load NER model file", "load_ner_model_error", new Object[] {nerModelFile}, e);
    } catch (ClassNotFoundException e) {
      System.err.println("ClassNotFoundException in creating chunker");
      throw new ResourceInitializationException(
          "Unable to load NER model file", "load_ner_model_error", new Object[] {nerModelFile}, e);
    } catch (URISyntaxException e) {
      e.printStackTrace();
      throw new ResourceInitializationException(
          "Unable to load NER model file", "load_ner_model_error", new Object[] {nerModelFile}, e);
    }

    // Read in the geneDictionary
    geneDictionary = new HashSet<String>();
    try {
      File geneDictionaryFile =
          new File(new URI(AustinMaKeytermExtractor.class.getResource("/ref.dic").toString()));
      BufferedReader reader = new BufferedReader(new FileReader(geneDictionaryFile));
      String line;
      try {
        while ((line = reader.readLine()) != null) geneDictionary.add(line);
      } catch (IOException e) {
        System.err.println("IOException in reading gene dictionary file");
        throw new ResourceInitializationException(
            "Unable to load gene dictionary file", "load_gene_dic_error", new Object[] {}, e);
      }
    } catch (FileNotFoundException e) {
      System.err.println("FileNotFoundException in reading gene dictionary file");
      throw new ResourceInitializationException(
          "Unable to find gene dictionary file", "load_gene_dic_error", new Object[] {}, e);
    } catch (URISyntaxException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
      throw new ResourceInitializationException(
          "Unable to find gene dictionary file", "load_gene_dic_error", new Object[] {}, e1);
    }
  }
 public static void main(String[] args) {
   String modelFile = "src/main/resources/models/ne-en-bio-genetag.HmmChunker";
   String question =
       "What is the role of PrnP in mad cow disease (bovine spongiform encephalitis, BSE)?";
   try {
     Chunker achunker = (Chunker) AbstractExternalizable.readObject(new File(modelFile));
     Chunking chunking = achunker.chunk(question);
     for (Chunk chunk : chunking.chunkSet()) {
       System.out.println(question.substring(chunk.start(), chunk.end()));
     }
   } catch (Exception e) {
     System.err.println("oops");
   }
 }
  public ChunkerFeatureExtractor() throws ClassNotFoundException, IOException {

    @SuppressWarnings("unchecked") // req for deserialize
    CodeSource src = this.getClass().getProtectionDomain().getCodeSource();
    String loc = src.getLocation().toString();
    File hmmFile =
        new File(
            loc.substring(5, loc.length() - 10)
                + "/objects/pos-en-general-brown.HiddenMarkovModel");
    // File hmmFile = new
    // File("C:/Users/D059348/dev/HU/MaschinelleSprachverarbeitung/objects/pos-en-general-brown.HiddenMarkovModel");
    HiddenMarkovModel posHmm = (HiddenMarkovModel) AbstractExternalizable.readObject(hmmFile);

    FastCache<String, double[]> emissionCache = new FastCache<String, double[]>(100000);
    mPosTagger = new HmmDecoder(posHmm, null, emissionCache);
  }
 @Override
 public void initialize(UimaContext aContext) throws ResourceInitializationException {
   super.initialize(aContext);
   // initialization part from annotator
   String model = (String) aContext.getConfigParameterValue("TrainedModel");
   modelFile = new File(model);
   try {
     extractor = (Chunker) AbstractExternalizable.readObject(modelFile);
   } catch (IOException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (ClassNotFoundException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
 }
示例#10
0
 /**
  * @param fileName
  * @return
  * @throws IOException
  * @throws ClassNotFoundException
  */
 private LMClassifier<?, ?> deserialize(String fileName)
     throws ClassNotFoundException, IOException {
   return (LMClassifier<?, ?>)
       AbstractExternalizable.readObject(new File(classLoader.getResource(fileName).getFile()));
 }
示例#11
0
  void assertChunking(TokenChunker chunker, String in, String[] types, int[] starts, int[] ends)
      throws IOException {

    assertChunkingResult(chunker, in, types, starts, ends);
    Chunker chunker2 = (Chunker) AbstractExternalizable.serializeDeserialize(chunker);
  }