示例#1
0
  public Example(
      String name,
      String[] input,
      Lexicon lexicon,
      Map<ElementaryStringTree, ArrayList<Fringe>> shadowTreesMap,
      SuperTagger superTagger,
      FreqCounter freqCounter,
      Options opts) {
    this.opts = opts;
    this.name = name;
    this.lexicon = lexicon;
    this.shadowTreesMap = shadowTreesMap;
    this.superTagger = superTagger;
    this.freqCounter = freqCounter;

    if (input[0].equals(
        "NOT PARSED")) // we could not extract the lexicon or missing entirely from gold standard
                       // dataset
    {
      sentence = posTagged = parsed = goldStandardNoTraces = solution = "";
      numOfWords = 0;
      notParsed = true;
    } else {
      if (opts.estimateProcDifficulty) {
        if (opts.inputType == Options.InputType.dundee) {
          readDundeeInput(input[0]);
        } else if (opts.inputType == Options.InputType.posTagged
            || (opts.inputType == Options.InputType.pltag
                && input[0].contains("\t"))) // tab delimited POS-word pairs
        {
          readPosTagged(input[0]);
        } else if (opts.inputType == Options.InputType.pltag) {
          readPennTreebank(input, true);
          solution = parsed;
        } else // input is plain text without POS tags
        {
          if (opts.goldPosTags) {
            readPosTagged(PosTagger.posTagLine(input[0]));
          } else {
            StringBuilder str = new StringBuilder();
            //                    for(String word : removeQuotesPlain(input[0]).split(" "))
            for (String word : Utils.tokenizeStanford(input[0]).split(" ")) {
              str.append(String.format("N/A %s\t", word));
            }
            readPosTagged(str.toString().trim());
          }
        }
        solution = "";
      } else {
        readPennTreebank(input, true);
        solution = parsed;
      }
      this.numOfWords = posTagged.split("\t").length;
    }
  }