public static Attribute create(String natureWithFrequency) { try { String param[] = natureWithFrequency.split(" "); int natureCount = param.length / 2; Attribute attribute = new Attribute(natureCount); for (int i = 0; i < natureCount; ++i) { attribute.nature[i] = LexiconUtility.convertStringToNature(param[2 * i], null); attribute.frequency[i] = Integer.parseInt(param[1 + 2 * i]); attribute.totalFrequency += attribute.frequency[i]; } return attribute; } catch (Exception e) { logger.warning( "使用字符串" + natureWithFrequency + "创建词条属性失败!" + TextUtility.exceptionToString(e)); return null; } }
/** * 从文件加载,同时缓存为二进制文件 * * @param path * @return */ public static MaxEntModel create(String path) { MaxEntModel m = new MaxEntModel(); try { BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8")); DataOutputStream out = new DataOutputStream(new FileOutputStream(path + Predefine.BIN_EXT)); br.readLine(); // type m.correctionConstant = Integer.parseInt(br.readLine()); // correctionConstant out.writeInt(m.correctionConstant); m.correctionParam = Double.parseDouble(br.readLine()); // getCorrectionParameter out.writeDouble(m.correctionParam); // label int numOutcomes = Integer.parseInt(br.readLine()); out.writeInt(numOutcomes); String[] outcomeLabels = new String[numOutcomes]; m.outcomeNames = outcomeLabels; for (int i = 0; i < numOutcomes; i++) { outcomeLabels[i] = br.readLine(); TextUtility.writeString(outcomeLabels[i], out); } // pattern int numOCTypes = Integer.parseInt(br.readLine()); out.writeInt(numOCTypes); int[][] outcomePatterns = new int[numOCTypes][]; for (int i = 0; i < numOCTypes; i++) { StringTokenizer tok = new StringTokenizer(br.readLine(), " "); int[] infoInts = new int[tok.countTokens()]; out.writeInt(infoInts.length); for (int j = 0; tok.hasMoreTokens(); j++) { infoInts[j] = Integer.parseInt(tok.nextToken()); out.writeInt(infoInts[j]); } outcomePatterns[i] = infoInts; } // feature int NUM_PREDS = Integer.parseInt(br.readLine()); out.writeInt(NUM_PREDS); String[] predLabels = new String[NUM_PREDS]; m.pmap = new DoubleArrayTrie<Integer>(); TreeMap<String, Integer> tmpMap = new TreeMap<String, Integer>(); for (int i = 0; i < NUM_PREDS; i++) { predLabels[i] = br.readLine(); TextUtility.writeString(predLabels[i], out); tmpMap.put(predLabels[i], i); } m.pmap.build(tmpMap); for (Map.Entry<String, Integer> entry : tmpMap.entrySet()) { out.writeInt(entry.getValue()); } m.pmap.save(out); // params Context[] params = new Context[NUM_PREDS]; int pid = 0; for (int i = 0; i < outcomePatterns.length; i++) { int[] outcomePattern = new int[outcomePatterns[i].length - 1]; for (int k = 1; k < outcomePatterns[i].length; k++) { outcomePattern[k - 1] = outcomePatterns[i][k]; } for (int j = 0; j < outcomePatterns[i][0]; j++) { double[] contextParameters = new double[outcomePatterns[i].length - 1]; for (int k = 1; k < outcomePatterns[i].length; k++) { contextParameters[k - 1] = Double.parseDouble(br.readLine()); out.writeDouble(contextParameters[k - 1]); } params[pid] = new Context(outcomePattern, contextParameters); pid++; } } // prior m.prior = new UniformPrior(); m.prior.setLabels(outcomeLabels); // eval m.evalParams = new EvalParameters(params, m.correctionParam, m.correctionConstant, outcomeLabels.length); } catch (Exception e) { logger.severe("从" + path + "加载最大熵模型失败!" + TextUtility.exceptionToString(e)); return null; } return m; }