private void loadData(String filePath, final int[] inFactors, final int[] outFactors) throws IOException { FactorCollection factorCollection = FactorCollection.instance(); final String factorDelimiter = StaticData.instance().getFactorDelimiter(); VERBOSE.v(2, "Loading global lexical model from file " + filePath + "\n"); // m_inputFactors = new FactorMask(inFactors); // m_outputFactors = new FactorMask(outFactors); BufferedReader inFile = new BufferedReader(new FileReader(filePath)); // reading in data one line at a time int lineNum = 0; String line; while ((line = inFile.readLine()) != null) { ++lineNum; String token[] = Util.tokenize(line, " "); if (token.length != 3) // format checking { StringBuilder errorMessage = new StringBuilder(); errorMessage .append("Syntax error at ") .append(filePath) .append(":") .append(lineNum) .append('\n') .append(line) .append('\n'); UserMessage.add(errorMessage.toString()); System.exit(0); } // create the output word Word outWord = new Word(); String[] factorString = Util.tokenize(token[0], factorDelimiter); for (int i = 0; i < outFactors.length; i++) { final FactorDirection direction = FactorDirection.Output; final int factorType = outFactors[i]; final Factor factor = factorCollection.addFactor(direction, factorType, factorString[i]); outWord.setFactor(factorType, factor); } // create the input word Word inWord = new Word(); factorString = Util.tokenize(token[1], factorDelimiter); for (int i = 0; i < inFactors.length; i++) { final FactorDirection direction = FactorDirection.Input; final int factorType = inFactors[i]; final Factor factor = factorCollection.addFactor(direction, factorType, factorString[i]); inWord.setFactor(factorType, factor); } // maximum entropy feature score float score = Float.valueOf(token[2]); // std::cerr << "storing word " << *outWord << " " << *inWord << // " " << score << endl; // store feature in hash Map<Word, Float> dh = m_hash.get(outWord); if (dh == null) { dh = new HashMap<Word, Float>(); dh.put(inWord, score); m_hash.put(outWord, dh); } else { dh.put(inWord, score); outWord = null; } } }