예제 #1
0
 /** Finds the lexical items used to produce the highest scoring parse. */
 public List getMaxLexEntries() {
   List result = new LinkedList();
   for (ParseResult p : bestParses) {
     result.addAll(p.getLexEntries());
   }
   return result;
 }
예제 #2
0
  /** Parse the given string, return resulting data if appropriate. */
  ParseResult internalParse(
      String s, Map<String, Integer> targetUnionDecisions, boolean mustConsumeStr) {
    //
    // If there's no target decision, then go ahead and try all branches.
    //
    if (targetUnionDecisions == null || targetUnionDecisions.get(name) == null) {
      for (InferredType subelt : unionTypes) {
        ParseResult pr = subelt.internalParse(s, targetUnionDecisions, false);
        if (pr != null
            && (!mustConsumeStr
                || (mustConsumeStr && pr.getRemainingString().trim().length() == 0))) {
          return new ParseResult(pr.getData(), pr.hasData(), pr.getRemainingString());
        }
      }
      return null;
    }

    //
    // If there is a target decision, then carry it out.
    //
    InferredType subelt = unionTypes.get(targetUnionDecisions.get(name));
    ParseResult pr = subelt.internalParse(s, targetUnionDecisions, false);
    if (pr != null
        && (!mustConsumeStr || (mustConsumeStr && pr.getRemainingString().trim().length() == 0))) {
      return new ParseResult(pr.getData(), pr.hasData(), pr.getRemainingString());
    }
    return null;
  }
예제 #3
0
 /** Finds the lexical items used to produce the highest scoring parse with semantics sem. */
 public List getMaxLexEntriesFor(Exp sem) {
   List result = new LinkedList();
   for (ParseResult p : findBestParses(allParses, sem)) {
     result.addAll(p.getLexEntries());
   }
   return result;
 }
예제 #4
0
 /** Returns the features for the highest-score current parse(s). */
 public HashVector getFeats() {
   HashVector result = new HashVector();
   // result.reset(theta.size(),0.0);
   for (ParseResult p : bestParses) p.getFeats(result);
   if (bestParses.size() > 1) result.divideBy(bestParses.size());
   return result;
 }
예제 #5
0
  /** Parse the given string, return resulting data if appropriate. */
  ParseResult internalParse(
      String s, Map<String, Integer> targetUnionDecisions, boolean mustConsumeStr) {
    boolean hasData = false;
    GenericData.Record gdr = new GenericData.Record(getAvroSchema());
    String currentStr = s;

    for (InferredType subelt : structTypes) {
      if (currentStr.length() == 0) {
        return null;
      }
      ParseResult pr = subelt.internalParse(currentStr, targetUnionDecisions, false);
      if (pr == null) {
        return null;
      }
      if (pr.hasData()) {
        hasData = true;
        gdr.put(subelt.getName(), pr.getData());
      }
      currentStr = pr.getRemainingString();
    }
    if (mustConsumeStr && currentStr.trim().length() != 0) {
      return null;
    }
    return new ParseResult(gdr, hasData, currentStr);
  }
예제 #6
0
 public boolean hasExp(Exp sem) {
   for (ParseResult p : allParses) {
     if (p.getExp().equals(sem)) {
       return true;
     }
   }
   return false;
 }
예제 #7
0
 /** Returns the features for the highest-score current parse with semantics that equal sem. */
 public HashVector getFeats(Exp sem) {
   HashVector result = new HashVector();
   List<ParseResult> pr = findBestParses(allParses, sem);
   for (ParseResult p : pr) {
     p.getFeats(result);
   }
   if (pr.size() > 1) result.divideBy(pr.size());
   return result;
 }
  /**
   * Reads next line and notifies susbscribed observers.
   *
   * @return true if more lines would be available, false if there was no possible to read the line,
   *     since the EOF has been reached.
   */
  public boolean next() {
    if (this.isClosed) return false;
    if (this.nomore) return false;
    try {
      final boolean unpoll[] = new boolean[symbols.length];
      long minTime = nextMilliseconds[0];
      for (int i = 1; i < symbols.length; i++) {
        if (nextMilliseconds[i] > minTime) unpoll[i] = true;
        else if (nextMilliseconds[i] < minTime) {
          minTime = nextMilliseconds[i];
          for (int j = 0; j < i; j++) unpoll[j] = true;
        }
      }

      boolean somethingToReturn = false;
      final ParseResult compossedResult = new ParseResult();
      for (int i = 0; i < symbols.length; i++) {
        if (unpoll[i] || this.nextLine[i] == null) {
          // Nothing to send to listeners.
        } else {
          somethingToReturn = true;
          final ParseResult localResult = this.lineParser.parse(this.nextLine[i]);
          // Move to next valid line.
          String line = partReader[i].readLine();
          while (line != null && !this.lineParser.isValid(line)) {
            line = partReader[i].readLine();
          }
          if (line != null) {
            if (!symbols[i].equals(localResult.getSymbol(0))) {
              this.nextLine[i] = null;
            } else {
              this.nextLine[i] = line;
              this.nextMilliseconds[i] = this.lineParser.getUTCTimestamp(line).getTimeInMillis();
            }
          } else {
            this.nextLine[i] = null;
          }
          compossedResult.merge(localResult);
        }
      }
      if (somethingToReturn) {
        for (int i = 0; i < this.spreadTradesMgr.length; i++) {
          this.spreadTradesMgr[i].accumulate(compossedResult);
        }
      } else {
        this.nomore = true;
        for (int i = 0; i < this.spreadTradesMgr.length; i++) {
          this.spreadTradesMgr[i].endAccumulation();
        }
      }
      return somethingToReturn;
    } catch (Exception e) {
      log.log(Level.SEVERE, "Exception dealing with file '" + filePath + "'", e);
      return false;
    }
  }
예제 #9
0
  public GenericContainer parse(String str) {
    //
    // Try the naive parse
    //
    ParseResult pr = internalParse(str, null, true);
    if (pr != null && pr.hasData()) {
      return (GenericContainer) pr.getData();
    }

    //
    // Otherwise, we need to consider other union-options.
    // Unfold the candidate decisions into a series of target decisions
    //
    Map<String, Set<Integer>> candidateUnionDecisions = findCandidateUnionDecisions();

    List<HashMap<String, Integer>> allUnionDecisions = new ArrayList<HashMap<String, Integer>>();
    for (Map.Entry<String, Set<Integer>> pair : candidateUnionDecisions.entrySet()) {
      String k = pair.getKey();
      Set<Integer> indices = pair.getValue();

      if (allUnionDecisions.size() == 0) {
        for (Integer index : indices) {
          HashMap<String, Integer> newMap = new HashMap<String, Integer>();
          newMap.put(k, index);
          allUnionDecisions.add(newMap);
        }
      } else {
        List<HashMap<String, Integer>> newUnionDecisions =
            new ArrayList<HashMap<String, Integer>>();
        for (HashMap<String, Integer> curUnionDecisions : allUnionDecisions) {
          for (Integer index : indices) {
            HashMap<String, Integer> newMap = (HashMap<String, Integer>) curUnionDecisions.clone();
            newMap.put(k, index);
            newUnionDecisions.add(newMap);
          }
        }
        allUnionDecisions = newUnionDecisions;
      }
    }

    //
    // Now execute all possible union decisions
    //
    for (Map<String, Integer> targetUnionDecisions : allUnionDecisions) {
      pr = internalParse(str, targetUnionDecisions, true);
      if (pr != null && pr.hasData()) {
        return (GenericContainer) pr.getData();
      }
    }
    return null;
  }
예제 #10
0
 private List<ParseResult> findBestParses(List<ParseResult> all, Exp e) {
   List<ParseResult> best = new LinkedList<ParseResult>();
   double bestScore = -Double.MAX_VALUE;
   for (ParseResult p : all) {
     if (p.getExp().inferType() != null) {
       if ((e == null || p.getExp().equals(e))) {
         if (p.getScore() == bestScore) best.add(p);
         if (p.getScore() > bestScore) {
           bestScore = p.getScore();
           best.clear();
           best.add(p);
         }
       }
     }
   }
   return best;
 }
예제 #11
0
  /**
   * Parse function for loading a PeakML file containing only masschromatogram entries. When an
   * entry of another type is encountered an {@link XmlParserException} is thrown. The resulting
   * {@link ParseResult} instance is type-bound to {@link MassChromatogram} to force only
   * masschromatogram types.
   *
   * @param in The input-stream to load the data from.
   * @return The header and peak information stored in the file.
   * @throws IOException Thrown on an IOException.
   * @throws XmlParserException Thrown when an unknown IPeak object is encountered.
   */
  public static ParseResult parseMassChromatograms(InputStream in, ParserProgressListener listener)
      throws IOException, XmlParserException {
    final ParseResult result = new ParseResult();
    final Vector<MassChromatogram<? extends Peak>> peaks =
        new Vector<MassChromatogram<? extends Peak>>();
    final ParserProgressListener _listener = listener;

    class myListener implements XmlParserListener {
      int index = 0;

      public void onDocument(Document document, String xpath) throws XmlParserException {
        if (xpath.equals(XPATH_IPEAK)) {
          Node node = document.getChildNodes().item(0);

          // check whether we're getting the correct ipeak
          Node typeattribute = node.getAttributes().getNamedItem(PeakMLWriter.TYPE);
          if (typeattribute == null)
            throw new XmlParserException("Failed to locate the type attribute.");
          if (!typeattribute.getNodeValue().equals(PeakMLWriter.TYPE_MASSCHROMATOGRAM))
            throw new XmlParserException(
                "IPeak ("
                    + typeattribute.getNodeValue()
                    + ") is not of type: '"
                    + PeakMLWriter.TYPE_MASSCHROMATOGRAM
                    + "'");

          // parse this node as a mass chromatogram
          MassChromatogram<? extends Peak> masschromatogram = parseMassChromatogram(node);
          if (masschromatogram != null) peaks.add(masschromatogram);

          //
          if (_listener != null && result.header != null && result.header.getNrPeaks() != 0)
            _listener.update((100. * index++) / result.header.getNrPeaks());
        } else if (xpath.equals(XPATH_HEADER)) {
          result.header = parseHeader(document.getFirstChild());
        }
      }
    }
    run(in, new myListener());

    result.measurement = new IPeakSet<MassChromatogram<? extends Peak>>(peaks);
    return result;
  }
예제 #12
0
  /**
   * Parse function for blindly loading data from a PeakML file. This method loads the data in a
   * PeakML file as it encounters it in the file. This means that a mix-model is essentially
   * possible for peak-data stored in a PeakML file. The resulting {@link ParseResult} instance is
   * parameterized with {@link IPeak}. The class-information made available through the Java
   * language can be used to determine the original type of the peak. The function employs a
   * callback mechanism with {@link ParserProgressListener} to return information about the progress
   * through the file. This is calculated with the information returned by {@link
   * Header#getNrPeaks()}.
   *
   * <p>The loadall parameter can be used to restrict the amount of data actually being loaded by
   * the function. If this is set to false only data known by the {@link IPeak} class is loaded (in
   * this implementation this means that an instance of the {@link Centroid} is made). The class
   * information cannot be used to determine the original type of the peak when loadall is set to
   * false.
   *
   * @param in The input-stream to load the data from.
   * @param listener The progress listener.
   * @param loadall If set to false only the data known to IPeak is loaded as class Peak.
   * @return The header and peak information stored in the file.
   * @throws IOException Thrown on an IOException.
   * @throws XmlParserException Thrown when an unknown IPeak object is encountered.
   */
  public static ParseResult parse(InputStream in, ParserProgressListener listener, boolean loadall)
      throws IOException, XmlParserException {
    //		final boolean _loadall = loadall;
    final ParserProgressListener _listener = listener;

    final ParseResult result = new ParseResult();
    final Vector<IPeak> peaks = new Vector<IPeak>();

    class myListener implements XmlParserListener {
      int index = 0;

      public void onDocument(Document document, String xpath) throws XmlParserException {
        if (xpath.equals(XPATH_IPEAK)) {
          Node node = document.getChildNodes().item(0);

          // check whether we're getting the correct ipeak
          Node typeattribute = node.getAttributes().getNamedItem(PeakMLWriter.TYPE);
          if (typeattribute == null)
            throw new XmlParserException("Failed to locate a type attribute.");

          // ...
          //					IPeak peak = (_loadall ? parseIPeak(node) : parseCentroid(node));
          IPeak peak = parseIPeak(node);
          if (peak != null) peaks.add(peak);

          //
          if (_listener != null && result.header != null && result.header.getNrPeaks() != 0)
            _listener.update((100. * index++) / result.header.getNrPeaks());
        } else if (xpath.equals(XPATH_HEADER)) {
          result.header = parseHeader(document.getFirstChild());
        }
      }
    }
    run(in, new myListener());

    result.measurement = new IPeakSet<IPeak>(peaks);
    return result;
  }
예제 #13
0
  /** Parse the given string, return resulting data if appropriate. */
  ParseResult internalParse(
      String s, Map<String, Integer> targetUnionDecisions, boolean mustConsumeStr) {
    boolean hasData = false;
    Schema localSchema = getAvroSchema();
    GenericData.Array gda = new GenericData.Array(5, localSchema);
    Map<String, Integer> curUnionDecisions = new HashMap<String, Integer>();
    String currentStr = s;

    while (true) {
      ParseResult pr = bodyType.internalParse(currentStr, targetUnionDecisions, false);
      if (pr == null) {
        break;
      }
      assert (pr.hasData());

      gda.add(pr.getData());
      currentStr = pr.getRemainingString();
    }
    if (mustConsumeStr && currentStr.trim().length() != 0) {
      return null;
    }
    return new ParseResult(gda, true, currentStr);
  }
예제 #14
0
 private List<ParseResult> removeRepeats(List<ParseResult> all) {
   System.out.println("----------------------- all.size equals to --------------" + all.size());
   List<ParseResult> bestList = new LinkedList<ParseResult>();
   for (int i = 0; i < all.size(); i++) {
     ParseResult e_i = all.get(i);
     boolean best = true;
     for (int j = i + 1; j < all.size(); j++) {
       ParseResult e_j = all.get(j);
       if (e_i.getExp().equals(e_j.getExp()) && e_i.getScore() <= e_j.getScore()) {
         best = false;
         break;
       }
     }
     if (best) bestList.add(e_i);
   }
   return bestList;
 }
예제 #15
0
  public boolean isCorrect(String words, Exp sem, Parser parser) {
    List<ParseResult> parses = parser.bestParses();
    if (parses.size() > 0) {
      noAnswer = false;
    } else {
      noAnswer = true;
    }
    if (parses.size() == 1) {
      ParseResult p = parses.get(0);
      Exp e = p.getExp();
      e = e.copy();
      e.simplify();
      List l = p.getLexEntries();
      parsed++;
      if (e.equals(sem)) {
        if (verbose) {
          System.out.println("CORRECT");
          printLex(l);
        }
        int lits = sem.allLitsCount();
        correctParses++;

        return true;
      } else {
        // one parse, it was wrong... oh well...
        if (verbose) {
          System.out.println("WRONG");
          System.out.println(parses.size() + " parses: " + parses);
          printLex(l);
        }
        wrongParses++;

        boolean hasCorrect = parser.hasParseFor(sem);
        if (verbose) {
          System.out.println("Had correct parse: " + hasCorrect);
          System.out.print("Feats: ");
          Exp eb = parser.bestSem();
          Chart c = parser.getChart();
          HashVector h = c.computeExpFeatVals(eb);
          h.divideBy(c.computeNorm(eb));
          h.dropSmallEntries();
          System.out.println(h);
        }
      }
    } else {
      noParses++;
      if (parses.size() > 1) {
        // There are more than one equally high scoring
        // logical forms. If this is the case, we abstain
        // from returning a result.
        if (verbose) {
          System.out.println("too many parses");
          System.out.println(parses.size() + " parses: " + parses);
        }
        Exp e = parses.get(0).getExp();
        ParseResult p = parses.get(0);
        boolean hasCorrect = parser.hasParseFor(sem);
        if (verbose) System.out.println("Had correct parse: " + hasCorrect);
      } else {
        // no parses, potentially reparse with word skipping
        if (verbose) System.out.println("no parses");
        if (emptyTest) {
          List<LexEntry> emps = new LinkedList<LexEntry>();
          for (int j = 0; j < Globals.tokens.size(); j++) {
            List l = Globals.tokens.subList(j, j + 1);
            LexEntry le = new LexEntry(l, Cat.EMP);
            emps.add(le);
          }

          parser.setTempLexicon(new Lexicon(emps));
          String mes = null;
          if (verbose) mes = "EMPTY";
          parser.parseTimed(words, null, mes);
          parser.setTempLexicon(null);
          parses = parser.bestParses();
          if (parses.size() == 1) {
            ParseResult p = parses.get(0);
            List l = p.getLexEntries();
            Exp e = p.getExp();
            e = e.copy();
            e.simplify();
            int noEmpty = p.noEmpty();
            if (e.equals(sem)) {
              if (verbose) {
                System.out.println("CORRECT");
                printLex(l);
              }
              emptyCorrect++;

            } else {
              // one parse, but wrong
              if (verbose) {
                System.out.println("WRONG: " + e);
                printLex(l);
                boolean hasCorrect = parser.hasParseFor(sem);
                System.out.println("Had correct parse: " + hasCorrect);
              }
            }
          } else {
            // too many parses or no parses
            emptyNoParses++;
            if (verbose) {
              System.out.println("WRONG:" + parses);
              boolean hasCorrect = parser.hasParseFor(sem);
              System.out.println("Had correct parse: " + hasCorrect);
            }
          }
        }
      }
    }
    return false;
  }