示例#1
0
  private void processExamples(String group, List<Example> examples) {
    Evaluation evaluation = new Evaluation();
    if (examples.isEmpty()) return;

    final String prefix = "iter=0." + group;
    Execution.putOutput("group", group);
    LogInfo.begin_track_printAll("Processing %s: %s examples", prefix, examples.size());
    LogInfo.begin_track("Dumping metadata");
    dumpMetadata(group, examples);
    LogInfo.end_track();
    LogInfo.begin_track("Examples");

    for (int e = 0; e < examples.size(); e++) {
      Example ex = examples.get(e);
      LogInfo.begin_track_printAll("%s: example %s/%s: %s", prefix, e, examples.size(), ex.id);
      ex.log();
      Execution.putOutput("example", e);
      StopWatchSet.begin("Parser.parse");
      ParserState state = builder.parser.parse(params, ex, false);
      StopWatchSet.end();
      out.printf("########## Example %s ##########\n", ex.id);
      dumpExample(exampleToLispTree(state));
      LogInfo.logs("Current: %s", ex.evaluation.summary());
      evaluation.add(ex.evaluation);
      LogInfo.logs("Cumulative(%s): %s", prefix, evaluation.summary());
      LogInfo.end_track();
      ex.predDerivations.clear(); // To save memory
    }

    LogInfo.end_track();
    LogInfo.logs("Stats for %s: %s", prefix, evaluation.summary());
    evaluation.logStats(prefix);
    evaluation.putOutput(prefix);
    LogInfo.end_track();
  }
示例#2
0
  public static void autoTest(int k, int cals) {
    int settingCnt = 0;
    R = 20;
    K = k;
    for (L = 200; L <= 800; L += 200) {
      for (double rate = 1.25; rate <= 2; rate += 0.25) {
        settingCnt++;
        LogInfo.begin_track("Testing data setting %d", settingCnt);
        n = (int) (rate * L * K / (2 * R)) + 1;
        LogInfo.logs("n = %d, L = %d, K = %d, R = %d, redundancy rate: %.2f", n, L, K, R, rate);
        long stTime = System.currentTimeMillis();
        for (int numCnt = 1; numCnt <= cals; numCnt++) {
          x = new double[n + 2];
          xs = new double[n + 2];
          y = new double[n + 2];
          ys = new double[n + 2];

          // Generate x positions
          Set<Integer> set = new HashSet<>();
          while (set.size() < n) {
            int num = (int) (Math.random() * (L - 1)) + 1;
            set.add(num);
          }
          List<Integer> list = new ArrayList<>(set);
          Collections.sort(list);
          for (int i = 0; i < n; i++) xs[i + 1] = x[i + 1] = list.get(i);

          // Generate y positions
          set = new HashSet<>();
          while (set.size() < n) {
            int num = (int) (Math.random() * (L - 1)) + 1;
            set.add(num);
          }
          list = new ArrayList<>(set);
          for (int i = 0; i < n; i++) ys[i + 1] = y[i + 1] = list.get(i);

          if (verbose) {
            LogInfo.begin_track("Data #%d:", numCnt);
            printXY();
          }
          strongDetect();
          if (verbose) LogInfo.end_track();
        }
        long edTime = System.currentTimeMillis();
        long time = edTime - stTime;
        LogInfo.logs(
            "Time: %dms, [n = %d, L = %d, K = %d, R = %d, redundancy rate: %.2f]",
            time, n, L, K, R, rate);
        LogInfo.end_track();
      }
    }
  }
示例#3
0
 public QuestionGenerator() throws IOException {
   Properties props = new Properties();
   props.put("annotators", "tokenize,ssplit,pos,parse");
   pipeline = new StanfordCoreNLP(props);
   LogInfo.begin_track("uploading lexicon");
   uploadAlignmentLexicon();
   LogInfo.logs("Number of lexicon formulas: %s", formulaToLexemsMap.size());
   LogInfo.end_track();
 }
示例#4
0
  public boolean logs(int iter, String group) {
    List<File> files = Vis.getFilesPerExec(execPaths, iter, group);

    if (files == null) return false;

    LogInfo.logs("Reading files: %s", files);
    final int n = files.size();

    List<ConfusionMatrix> softMs = new ArrayList<ConfusionMatrix>(n);
    List<ConfusionMatrix> hardMs = new ArrayList<ConfusionMatrix>(n);
    for (int i = 0; i < n; i++) {
      softMs.add(new ConfusionMatrix());
      hardMs.add(new ConfusionMatrix());
    }

    final double ct = 0.5d;
    final double pt = 0.5d;

    for (List<Example> row : Vis.zipExamples(files)) {
      for (int i = 0; i < n; i++) {
        Example ex = row.get(i);
        ConfusionMatrix softM = softMs.get(i);
        ConfusionMatrix hardM = hardMs.get(i);
        updateConfusionMatrix(softMs.get(i), ex, -1.0d, -1.0d);
        updateConfusionMatrix(hardMs.get(i), ex, ct, pt);
      }
    }

    LogInfo.begin_track("Soft");
    logsMatrices(softMs);
    LogInfo.end_track();

    LogInfo.begin_track("Hard (compatThresh=%.2f, probThresh=%.2f)", ct, pt);
    logsMatrices(hardMs);
    LogInfo.end_track();

    return true;
  }
示例#5
0
  public static void strongDetect() {
    used = new HashSet<>();
    nw = nl = L / (2 * R);
    for (int k = 1; k <= K; k++) {
      LogInfo.begin_track("Starting round K = %d", k);
      // Select the Horizontal Grid Barrier
      int tarRow = HGBS();

      LogInfo.logs("Target row position: %d @ K = %d", tarRow, k);

      // Construct the bipartite graph
      edges = new double[nl][n];
      if (verbose) LogInfo.logs("Edge Matrix @ K = %d: ", k);
      for (int i = 0; i < nl; i++) {
        String str = "";
        for (int j = 0; j < n; j++) {
          if (!used.contains(j + 1))
            edges[i][j] = findDist((2 * i + 1) * R, (2 * tarRow + 1) * R, x[j + 1], y[j + 1]);
          else edges[i][j] = 2 * L * L;
          String tmp = String.format("%.2f", edges[i][j]);
          str += (tmp + "\t");
        }
        if (verbose) LogInfo.logs(str);
      }
      // naive(edges, tarRow);
      Hungarian hungarian = new Hungarian(edges);
      int[] ret = hungarian.execute();
      LogInfo.logs("Matching result by Hungarian Algo. @ K = %d: ", k);
      String str = "";
      for (int i = 0; i < ret.length; i++) {
        str += ret[i] + "\t";
        int sidx = ret[i];
        x[sidx + 1] = (2 * i + 1) * R;
        y[sidx + 1] = (2 * tarRow + 1) * R;
        used.add(sidx + 1);
      }
      LogInfo.logs("[%s]", str);
      LogInfo.end_track();
    }
    printRet();
    // printRet4Draw();
  }
示例#6
0
 public static void create(String inFile, String outFile, String th) throws Exception {
   BufferedReader br = new BufferedReader(new FileReader(inFile));
   BufferedWriter bw = new BufferedWriter(new FileWriter(outFile));
   Double threshold = Double.parseDouble(th);
   LogInfo.logs("Threshold: %f", threshold);
   String line = "";
   int size = 0;
   LogInfo.logs("Start to create Synsets...");
   while ((line = br.readLine()) != null) {
     LogInfo.begin_track("New Sysnet...");
     String[] spt = line.split("\t");
     ArrayList<String> list = new ArrayList<>();
     for (int i = 1; i < spt.length; i++) {
       String[] words = spt[i].split(" ");
       StringBuffer phrase = new StringBuffer();
       phrase.append(words[0]);
       for (int j = 1; j < words.length - 1; j++) phrase.append(" " + words[j]);
       String relation = phrase.toString();
       Double score = Double.parseDouble(words[words.length - 1]);
       // if (!set.contains(relation) && score > threshold) {
       if (score > threshold) {
         list.add(spt[i]);
         set.add(relation);
         LogInfo.logs("relation: %s, score: %f", relation, score);
       }
     }
     if (list.size() > 1) {
       size++;
       bw.write("#" + list.get(0));
       for (int j = 1; j < list.size(); j++) bw.write("\t" + list.get(j));
       bw.write("\n");
       LogInfo.logs("size: %d", list.size());
     }
     LogInfo.end_track();
   }
   br.close();
   bw.close();
   LogInfo.logs("Synsets created! size: %d", size);
 }
示例#7
0
  private void loadFormulaInfo() throws NumberFormatException, IOException {

    LogInfo.begin_track("Loading formula info...");
    LogInfo.logs("Adding schema properties");
    binaryFormulaInfoMap = freebaseInfo.createBinaryFormulaInfoMap();
    unaryFormulaInfoMap = freebaseInfo.createUnaryFormulaInfoMap();
    LogInfo.logs("Current number of binary formulas: " + binaryFormulaInfoMap.size());
    LogInfo.logs("Current number of unary formulas: " + unaryFormulaInfoMap.size());

    LogInfo.logs("Compuing reverse for schema formulas");
    computeReverseFormulaInfo();
    LogInfo.logs("Current number of binary formulas: " + binaryFormulaInfoMap.size());
    for (BinaryFormulaInfo info : binaryFormulaInfoMap.values()) {
      MapUtils.addToSet(atomicExtype2ToBinaryMap, info.expectedType2, info.formula);
      if (!isCvt(info.expectedType1)) {
        addMappingFromType2ToFormula(info.expectedType2, info.formula);
      }
    }

    LogInfo.logs("Generate formulas through CVTs");
    generateCvtFormulas(); // generate formulas for CVTs
    LogInfo.logs("Current number of binary formulas: " + binaryFormulaInfoMap.size());
    LogInfo.end_track();
  }
示例#8
0
 public static void readData(String inputFp) throws IOException {
   BufferedReader br = new BufferedReader(new FileReader(inputFp));
   String line = br.readLine();
   String spt[] = line.split("\t");
   n = Integer.parseInt(spt[0]);
   L = Integer.parseInt(spt[1]);
   K = Integer.parseInt(spt[2]);
   R = Integer.parseInt(spt[3]);
   x = new double[n + 2];
   xs = new double[n + 2];
   y = new double[n + 2];
   ys = new double[n + 2];
   line = br.readLine();
   spt = line.split("\t");
   for (int i = 0; i < spt.length; i++) xs[i + 1] = x[i + 1] = Double.parseDouble(spt[i]);
   line = br.readLine();
   spt = line.split("\t");
   for (int i = 0; i < spt.length; i++) ys[i + 1] = y[i + 1] = Double.parseDouble(spt[i]);
   br.close();
   LogInfo.begin_track("Input data loaded");
   LogInfo.logs("n = %d, L = %d, K = %d, R = %d", n, L, K, R);
   printXY();
   LogInfo.end_track();
 }
示例#9
0
  public static Evaluation eval(InferState state) {
    // Print out information about how well we're doing.
    Evaluation evaluation = new Evaluation();
    Candidate trueCandidate = state.getTrueCandidate();
    Candidate predCandidate = state.getCandidates().get(0);

    PredictionContext context = state.getContext();
    NgramContext ngramContext = NgramContext.get(context);
    Statistics statistics = state.getStatistics();
    Corpus corpus = statistics.getProjectLangCorpus(context.getPath());
    DataSummary summary = statistics.getStatistic(NgramKNCounts.class, corpus).getSummary();
    Params params = state.getParams();
    boolean oracle = state.isOracle();
    int rank = state.getRank();
    double entropy = state.getEntropy();
    double reciprocalRank = state.getReciprocalRank();
    boolean isIdent = state.isIdent();
    boolean correct = state.isCorrect();

    String path = context.getPath();

    String trueTokenStr = trueCandidate.token;
    String predToken = predCandidate.token;
    evaluation.add("accuracy", correct);
    evaluation.add("oracle", oracle);
    evaluation.add("rank", rank);
    evaluation.add("reciprocalRank", reciprocalRank);
    if (oracle) {
      evaluation.add("entropy", entropy);
    }
    if (isIdent) {
      evaluation.add("identAccuracy", correct);
      evaluation.add("identOracle", oracle);
      if (oracle) {
        evaluation.add("identEntropy", entropy);
        evaluation.add("identReciprocalRank", reciprocalRank);
        for (int i = 0; i < Main.clusters; i++) {
          evaluation.add("identEntropy" + i, -Math.log(trueCandidate.clusterProbs[i]));
        }
      }
    }
    String contextStr = ngramContext.contextStr();
    if (Main.verbose >= 2) {
      String entropyStr = oracle ? Fmt.D(entropy) : "N/A";
      begin_track(
          "Example %s [%s]: %s (%d candidates, rank %s, entropy %s)",
          path,
          correct ? "CORRECT" : "WRONG",
          contextStr,
          state.getCandidates().size(),
          rank,
          entropyStr);
      logs("True (prob= %s): [%s]", Fmt.D(trueCandidate.prob), trueTokenStr);
      logs("Pred (prob= %s): [%s]", Fmt.D(predCandidate.prob), predToken);
      if (oracle) {
        KneserNey.logKNIs(true);
        KneserNey.computeProb(CandidateNgram.get(context, trueCandidate), summary);
        KneserNey.logKNIs(false);
      }
      // begin_track("True");
      FeatureVector.logFeatureWeights("True", trueCandidate.features.toMap(), params);
      // for (int i = 0; i < Main.clusters; i++) {
      //   logs("cluster=%d, score %s, prob %s", i, Fmt.D(trueCandidate.clusterScores[i]),
      // Fmt.D(trueCandidate.clusterProbs[i]));
      //   FeatureVector.logFeatureWeights("cluster=" + i,
      //                                   trueCandidate.clusterFeatures.toMap(),
      //                                   params, Main.clusterDecorators[i]);
      // }
      // end_track();
      KneserNey.logKNIs(true);
      KneserNey.computeProb(CandidateNgram.get(context, predCandidate), summary);
      KneserNey.logKNIs(false);
      FeatureVector.logFeatureWeights("Pred", predCandidate.features.toMap(), params);
      // for (Candidate candidate : candidates) {
      //   begin_track("Candidate " + candidate.token);
      //   for (int i = 0; i < Main.clusters; i++) {
      //     logs("cluster=%d, score %s, prob %s", i, Fmt.D(candidate.clusterScores[i]),
      // Fmt.D(candidate.clusterProbs[i]));
      //     FeatureVector.logFeatureWeights("cluster=" + i,
      //                                     candidate.clusterFeatures.toMap(),
      //                                     params, Main.clusterDecorators[i]);
      //   }
      //   end_track();
      // }
      FeatureVector.logFeatureDiff(
          "True - Pred", trueCandidate.features, predCandidate.features, params);
      end_track();
    }
    // Longest context that has been seen
    int context_max_n = ngramContext.getMax_n() - 1;
    while (context_max_n > 0
        && !summary.counts[context_max_n].containsKey(ngramContext.subContext(context_max_n + 1)))
      context_max_n--;
    evaluation.add("context_max_n", context_max_n);
    predOut.println(
        "path="
            + path
            + "\tident="
            + (isIdent ? 1 : 0)
            + "\tcontext="
            + contextStr
            + "\tcontext_max_n="
            + context_max_n
            + "\ttrue="
            + trueTokenStr
            + "\tpred="
            + predToken
            + "\trank="
            + rank
            + "\tentropy="
            + entropy);
    predOut.flush();
    entOut.println(
        path
            + "\t"
            + state.getTrueToken().loc()
            + "\t"
            + (isIdent ? 1 : 0)
            + "\t"
            + (oracle ? entropy : (state.isOov() ? "oov" : "offBeam"))
            + "\t"
            + reciprocalRank);
    entOut.flush();

    return evaluation;
  }