private void processExamples(String group, List<Example> examples) { Evaluation evaluation = new Evaluation(); if (examples.isEmpty()) return; final String prefix = "iter=0." + group; Execution.putOutput("group", group); LogInfo.begin_track_printAll("Processing %s: %s examples", prefix, examples.size()); LogInfo.begin_track("Dumping metadata"); dumpMetadata(group, examples); LogInfo.end_track(); LogInfo.begin_track("Examples"); for (int e = 0; e < examples.size(); e++) { Example ex = examples.get(e); LogInfo.begin_track_printAll("%s: example %s/%s: %s", prefix, e, examples.size(), ex.id); ex.log(); Execution.putOutput("example", e); StopWatchSet.begin("Parser.parse"); ParserState state = builder.parser.parse(params, ex, false); StopWatchSet.end(); out.printf("########## Example %s ##########\n", ex.id); dumpExample(exampleToLispTree(state)); LogInfo.logs("Current: %s", ex.evaluation.summary()); evaluation.add(ex.evaluation); LogInfo.logs("Cumulative(%s): %s", prefix, evaluation.summary()); LogInfo.end_track(); ex.predDerivations.clear(); // To save memory } LogInfo.end_track(); LogInfo.logs("Stats for %s: %s", prefix, evaluation.summary()); evaluation.logStats(prefix); evaluation.putOutput(prefix); LogInfo.end_track(); }
public static void autoTest(int k, int cals) { int settingCnt = 0; R = 20; K = k; for (L = 200; L <= 800; L += 200) { for (double rate = 1.25; rate <= 2; rate += 0.25) { settingCnt++; LogInfo.begin_track("Testing data setting %d", settingCnt); n = (int) (rate * L * K / (2 * R)) + 1; LogInfo.logs("n = %d, L = %d, K = %d, R = %d, redundancy rate: %.2f", n, L, K, R, rate); long stTime = System.currentTimeMillis(); for (int numCnt = 1; numCnt <= cals; numCnt++) { x = new double[n + 2]; xs = new double[n + 2]; y = new double[n + 2]; ys = new double[n + 2]; // Generate x positions Set<Integer> set = new HashSet<>(); while (set.size() < n) { int num = (int) (Math.random() * (L - 1)) + 1; set.add(num); } List<Integer> list = new ArrayList<>(set); Collections.sort(list); for (int i = 0; i < n; i++) xs[i + 1] = x[i + 1] = list.get(i); // Generate y positions set = new HashSet<>(); while (set.size() < n) { int num = (int) (Math.random() * (L - 1)) + 1; set.add(num); } list = new ArrayList<>(set); for (int i = 0; i < n; i++) ys[i + 1] = y[i + 1] = list.get(i); if (verbose) { LogInfo.begin_track("Data #%d:", numCnt); printXY(); } strongDetect(); if (verbose) LogInfo.end_track(); } long edTime = System.currentTimeMillis(); long time = edTime - stTime; LogInfo.logs( "Time: %dms, [n = %d, L = %d, K = %d, R = %d, redundancy rate: %.2f]", time, n, L, K, R, rate); LogInfo.end_track(); } } }
public QuestionGenerator() throws IOException { Properties props = new Properties(); props.put("annotators", "tokenize,ssplit,pos,parse"); pipeline = new StanfordCoreNLP(props); LogInfo.begin_track("uploading lexicon"); uploadAlignmentLexicon(); LogInfo.logs("Number of lexicon formulas: %s", formulaToLexemsMap.size()); LogInfo.end_track(); }
public boolean logs(int iter, String group) { List<File> files = Vis.getFilesPerExec(execPaths, iter, group); if (files == null) return false; LogInfo.logs("Reading files: %s", files); final int n = files.size(); List<ConfusionMatrix> softMs = new ArrayList<ConfusionMatrix>(n); List<ConfusionMatrix> hardMs = new ArrayList<ConfusionMatrix>(n); for (int i = 0; i < n; i++) { softMs.add(new ConfusionMatrix()); hardMs.add(new ConfusionMatrix()); } final double ct = 0.5d; final double pt = 0.5d; for (List<Example> row : Vis.zipExamples(files)) { for (int i = 0; i < n; i++) { Example ex = row.get(i); ConfusionMatrix softM = softMs.get(i); ConfusionMatrix hardM = hardMs.get(i); updateConfusionMatrix(softMs.get(i), ex, -1.0d, -1.0d); updateConfusionMatrix(hardMs.get(i), ex, ct, pt); } } LogInfo.begin_track("Soft"); logsMatrices(softMs); LogInfo.end_track(); LogInfo.begin_track("Hard (compatThresh=%.2f, probThresh=%.2f)", ct, pt); logsMatrices(hardMs); LogInfo.end_track(); return true; }
public static void strongDetect() { used = new HashSet<>(); nw = nl = L / (2 * R); for (int k = 1; k <= K; k++) { LogInfo.begin_track("Starting round K = %d", k); // Select the Horizontal Grid Barrier int tarRow = HGBS(); LogInfo.logs("Target row position: %d @ K = %d", tarRow, k); // Construct the bipartite graph edges = new double[nl][n]; if (verbose) LogInfo.logs("Edge Matrix @ K = %d: ", k); for (int i = 0; i < nl; i++) { String str = ""; for (int j = 0; j < n; j++) { if (!used.contains(j + 1)) edges[i][j] = findDist((2 * i + 1) * R, (2 * tarRow + 1) * R, x[j + 1], y[j + 1]); else edges[i][j] = 2 * L * L; String tmp = String.format("%.2f", edges[i][j]); str += (tmp + "\t"); } if (verbose) LogInfo.logs(str); } // naive(edges, tarRow); Hungarian hungarian = new Hungarian(edges); int[] ret = hungarian.execute(); LogInfo.logs("Matching result by Hungarian Algo. @ K = %d: ", k); String str = ""; for (int i = 0; i < ret.length; i++) { str += ret[i] + "\t"; int sidx = ret[i]; x[sidx + 1] = (2 * i + 1) * R; y[sidx + 1] = (2 * tarRow + 1) * R; used.add(sidx + 1); } LogInfo.logs("[%s]", str); LogInfo.end_track(); } printRet(); // printRet4Draw(); }
public static void create(String inFile, String outFile, String th) throws Exception { BufferedReader br = new BufferedReader(new FileReader(inFile)); BufferedWriter bw = new BufferedWriter(new FileWriter(outFile)); Double threshold = Double.parseDouble(th); LogInfo.logs("Threshold: %f", threshold); String line = ""; int size = 0; LogInfo.logs("Start to create Synsets..."); while ((line = br.readLine()) != null) { LogInfo.begin_track("New Sysnet..."); String[] spt = line.split("\t"); ArrayList<String> list = new ArrayList<>(); for (int i = 1; i < spt.length; i++) { String[] words = spt[i].split(" "); StringBuffer phrase = new StringBuffer(); phrase.append(words[0]); for (int j = 1; j < words.length - 1; j++) phrase.append(" " + words[j]); String relation = phrase.toString(); Double score = Double.parseDouble(words[words.length - 1]); // if (!set.contains(relation) && score > threshold) { if (score > threshold) { list.add(spt[i]); set.add(relation); LogInfo.logs("relation: %s, score: %f", relation, score); } } if (list.size() > 1) { size++; bw.write("#" + list.get(0)); for (int j = 1; j < list.size(); j++) bw.write("\t" + list.get(j)); bw.write("\n"); LogInfo.logs("size: %d", list.size()); } LogInfo.end_track(); } br.close(); bw.close(); LogInfo.logs("Synsets created! size: %d", size); }
private void loadFormulaInfo() throws NumberFormatException, IOException { LogInfo.begin_track("Loading formula info..."); LogInfo.logs("Adding schema properties"); binaryFormulaInfoMap = freebaseInfo.createBinaryFormulaInfoMap(); unaryFormulaInfoMap = freebaseInfo.createUnaryFormulaInfoMap(); LogInfo.logs("Current number of binary formulas: " + binaryFormulaInfoMap.size()); LogInfo.logs("Current number of unary formulas: " + unaryFormulaInfoMap.size()); LogInfo.logs("Compuing reverse for schema formulas"); computeReverseFormulaInfo(); LogInfo.logs("Current number of binary formulas: " + binaryFormulaInfoMap.size()); for (BinaryFormulaInfo info : binaryFormulaInfoMap.values()) { MapUtils.addToSet(atomicExtype2ToBinaryMap, info.expectedType2, info.formula); if (!isCvt(info.expectedType1)) { addMappingFromType2ToFormula(info.expectedType2, info.formula); } } LogInfo.logs("Generate formulas through CVTs"); generateCvtFormulas(); // generate formulas for CVTs LogInfo.logs("Current number of binary formulas: " + binaryFormulaInfoMap.size()); LogInfo.end_track(); }
public static void readData(String inputFp) throws IOException { BufferedReader br = new BufferedReader(new FileReader(inputFp)); String line = br.readLine(); String spt[] = line.split("\t"); n = Integer.parseInt(spt[0]); L = Integer.parseInt(spt[1]); K = Integer.parseInt(spt[2]); R = Integer.parseInt(spt[3]); x = new double[n + 2]; xs = new double[n + 2]; y = new double[n + 2]; ys = new double[n + 2]; line = br.readLine(); spt = line.split("\t"); for (int i = 0; i < spt.length; i++) xs[i + 1] = x[i + 1] = Double.parseDouble(spt[i]); line = br.readLine(); spt = line.split("\t"); for (int i = 0; i < spt.length; i++) ys[i + 1] = y[i + 1] = Double.parseDouble(spt[i]); br.close(); LogInfo.begin_track("Input data loaded"); LogInfo.logs("n = %d, L = %d, K = %d, R = %d", n, L, K, R); printXY(); LogInfo.end_track(); }
public static Evaluation eval(InferState state) { // Print out information about how well we're doing. Evaluation evaluation = new Evaluation(); Candidate trueCandidate = state.getTrueCandidate(); Candidate predCandidate = state.getCandidates().get(0); PredictionContext context = state.getContext(); NgramContext ngramContext = NgramContext.get(context); Statistics statistics = state.getStatistics(); Corpus corpus = statistics.getProjectLangCorpus(context.getPath()); DataSummary summary = statistics.getStatistic(NgramKNCounts.class, corpus).getSummary(); Params params = state.getParams(); boolean oracle = state.isOracle(); int rank = state.getRank(); double entropy = state.getEntropy(); double reciprocalRank = state.getReciprocalRank(); boolean isIdent = state.isIdent(); boolean correct = state.isCorrect(); String path = context.getPath(); String trueTokenStr = trueCandidate.token; String predToken = predCandidate.token; evaluation.add("accuracy", correct); evaluation.add("oracle", oracle); evaluation.add("rank", rank); evaluation.add("reciprocalRank", reciprocalRank); if (oracle) { evaluation.add("entropy", entropy); } if (isIdent) { evaluation.add("identAccuracy", correct); evaluation.add("identOracle", oracle); if (oracle) { evaluation.add("identEntropy", entropy); evaluation.add("identReciprocalRank", reciprocalRank); for (int i = 0; i < Main.clusters; i++) { evaluation.add("identEntropy" + i, -Math.log(trueCandidate.clusterProbs[i])); } } } String contextStr = ngramContext.contextStr(); if (Main.verbose >= 2) { String entropyStr = oracle ? Fmt.D(entropy) : "N/A"; begin_track( "Example %s [%s]: %s (%d candidates, rank %s, entropy %s)", path, correct ? "CORRECT" : "WRONG", contextStr, state.getCandidates().size(), rank, entropyStr); logs("True (prob= %s): [%s]", Fmt.D(trueCandidate.prob), trueTokenStr); logs("Pred (prob= %s): [%s]", Fmt.D(predCandidate.prob), predToken); if (oracle) { KneserNey.logKNIs(true); KneserNey.computeProb(CandidateNgram.get(context, trueCandidate), summary); KneserNey.logKNIs(false); } // begin_track("True"); FeatureVector.logFeatureWeights("True", trueCandidate.features.toMap(), params); // for (int i = 0; i < Main.clusters; i++) { // logs("cluster=%d, score %s, prob %s", i, Fmt.D(trueCandidate.clusterScores[i]), // Fmt.D(trueCandidate.clusterProbs[i])); // FeatureVector.logFeatureWeights("cluster=" + i, // trueCandidate.clusterFeatures.toMap(), // params, Main.clusterDecorators[i]); // } // end_track(); KneserNey.logKNIs(true); KneserNey.computeProb(CandidateNgram.get(context, predCandidate), summary); KneserNey.logKNIs(false); FeatureVector.logFeatureWeights("Pred", predCandidate.features.toMap(), params); // for (Candidate candidate : candidates) { // begin_track("Candidate " + candidate.token); // for (int i = 0; i < Main.clusters; i++) { // logs("cluster=%d, score %s, prob %s", i, Fmt.D(candidate.clusterScores[i]), // Fmt.D(candidate.clusterProbs[i])); // FeatureVector.logFeatureWeights("cluster=" + i, // candidate.clusterFeatures.toMap(), // params, Main.clusterDecorators[i]); // } // end_track(); // } FeatureVector.logFeatureDiff( "True - Pred", trueCandidate.features, predCandidate.features, params); end_track(); } // Longest context that has been seen int context_max_n = ngramContext.getMax_n() - 1; while (context_max_n > 0 && !summary.counts[context_max_n].containsKey(ngramContext.subContext(context_max_n + 1))) context_max_n--; evaluation.add("context_max_n", context_max_n); predOut.println( "path=" + path + "\tident=" + (isIdent ? 1 : 0) + "\tcontext=" + contextStr + "\tcontext_max_n=" + context_max_n + "\ttrue=" + trueTokenStr + "\tpred=" + predToken + "\trank=" + rank + "\tentropy=" + entropy); predOut.flush(); entOut.println( path + "\t" + state.getTrueToken().loc() + "\t" + (isIdent ? 1 : 0) + "\t" + (oracle ? entropy : (state.isOov() ? "oov" : "offBeam")) + "\t" + reciprocalRank); entOut.flush(); return evaluation; }