コード例 #1
0
ファイル: CoreDictionary.java プロジェクト: aunjgr/HanLP
 public static Attribute create(String natureWithFrequency) {
   try {
     String param[] = natureWithFrequency.split(" ");
     int natureCount = param.length / 2;
     Attribute attribute = new Attribute(natureCount);
     for (int i = 0; i < natureCount; ++i) {
       attribute.nature[i] = LexiconUtility.convertStringToNature(param[2 * i], null);
       attribute.frequency[i] = Integer.parseInt(param[1 + 2 * i]);
       attribute.totalFrequency += attribute.frequency[i];
     }
     return attribute;
   } catch (Exception e) {
     logger.warning(
         "使用字符串" + natureWithFrequency + "创建词条属性失败!" + TextUtility.exceptionToString(e));
     return null;
   }
 }
コード例 #2
0
ファイル: MaxEntModel.java プロジェクト: zhoufeng/HanLP
 /**
  * 从文件加载,同时缓存为二进制文件
  *
  * @param path
  * @return
  */
 public static MaxEntModel create(String path) {
   MaxEntModel m = new MaxEntModel();
   try {
     BufferedReader br =
         new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"));
     DataOutputStream out = new DataOutputStream(new FileOutputStream(path + Predefine.BIN_EXT));
     br.readLine(); // type
     m.correctionConstant = Integer.parseInt(br.readLine()); // correctionConstant
     out.writeInt(m.correctionConstant);
     m.correctionParam = Double.parseDouble(br.readLine()); // getCorrectionParameter
     out.writeDouble(m.correctionParam);
     // label
     int numOutcomes = Integer.parseInt(br.readLine());
     out.writeInt(numOutcomes);
     String[] outcomeLabels = new String[numOutcomes];
     m.outcomeNames = outcomeLabels;
     for (int i = 0; i < numOutcomes; i++) {
       outcomeLabels[i] = br.readLine();
       TextUtility.writeString(outcomeLabels[i], out);
     }
     // pattern
     int numOCTypes = Integer.parseInt(br.readLine());
     out.writeInt(numOCTypes);
     int[][] outcomePatterns = new int[numOCTypes][];
     for (int i = 0; i < numOCTypes; i++) {
       StringTokenizer tok = new StringTokenizer(br.readLine(), " ");
       int[] infoInts = new int[tok.countTokens()];
       out.writeInt(infoInts.length);
       for (int j = 0; tok.hasMoreTokens(); j++) {
         infoInts[j] = Integer.parseInt(tok.nextToken());
         out.writeInt(infoInts[j]);
       }
       outcomePatterns[i] = infoInts;
     }
     // feature
     int NUM_PREDS = Integer.parseInt(br.readLine());
     out.writeInt(NUM_PREDS);
     String[] predLabels = new String[NUM_PREDS];
     m.pmap = new DoubleArrayTrie<Integer>();
     TreeMap<String, Integer> tmpMap = new TreeMap<String, Integer>();
     for (int i = 0; i < NUM_PREDS; i++) {
       predLabels[i] = br.readLine();
       TextUtility.writeString(predLabels[i], out);
       tmpMap.put(predLabels[i], i);
     }
     m.pmap.build(tmpMap);
     for (Map.Entry<String, Integer> entry : tmpMap.entrySet()) {
       out.writeInt(entry.getValue());
     }
     m.pmap.save(out);
     // params
     Context[] params = new Context[NUM_PREDS];
     int pid = 0;
     for (int i = 0; i < outcomePatterns.length; i++) {
       int[] outcomePattern = new int[outcomePatterns[i].length - 1];
       for (int k = 1; k < outcomePatterns[i].length; k++) {
         outcomePattern[k - 1] = outcomePatterns[i][k];
       }
       for (int j = 0; j < outcomePatterns[i][0]; j++) {
         double[] contextParameters = new double[outcomePatterns[i].length - 1];
         for (int k = 1; k < outcomePatterns[i].length; k++) {
           contextParameters[k - 1] = Double.parseDouble(br.readLine());
           out.writeDouble(contextParameters[k - 1]);
         }
         params[pid] = new Context(outcomePattern, contextParameters);
         pid++;
       }
     }
     // prior
     m.prior = new UniformPrior();
     m.prior.setLabels(outcomeLabels);
     // eval
     m.evalParams =
         new EvalParameters(params, m.correctionParam, m.correctionConstant, outcomeLabels.length);
   } catch (Exception e) {
     logger.severe("从" + path + "加载最大熵模型失败!" + TextUtility.exceptionToString(e));
     return null;
   }
   return m;
 }