/** Finds the lexical items used to produce the highest scoring parse. */ public List getMaxLexEntries() { List result = new LinkedList(); for (ParseResult p : bestParses) { result.addAll(p.getLexEntries()); } return result; }
/** Parse the given string, return resulting data if appropriate. */ ParseResult internalParse( String s, Map<String, Integer> targetUnionDecisions, boolean mustConsumeStr) { // // If there's no target decision, then go ahead and try all branches. // if (targetUnionDecisions == null || targetUnionDecisions.get(name) == null) { for (InferredType subelt : unionTypes) { ParseResult pr = subelt.internalParse(s, targetUnionDecisions, false); if (pr != null && (!mustConsumeStr || (mustConsumeStr && pr.getRemainingString().trim().length() == 0))) { return new ParseResult(pr.getData(), pr.hasData(), pr.getRemainingString()); } } return null; } // // If there is a target decision, then carry it out. // InferredType subelt = unionTypes.get(targetUnionDecisions.get(name)); ParseResult pr = subelt.internalParse(s, targetUnionDecisions, false); if (pr != null && (!mustConsumeStr || (mustConsumeStr && pr.getRemainingString().trim().length() == 0))) { return new ParseResult(pr.getData(), pr.hasData(), pr.getRemainingString()); } return null; }
/** Finds the lexical items used to produce the highest scoring parse with semantics sem. */ public List getMaxLexEntriesFor(Exp sem) { List result = new LinkedList(); for (ParseResult p : findBestParses(allParses, sem)) { result.addAll(p.getLexEntries()); } return result; }
/** Returns the features for the highest-score current parse(s). */ public HashVector getFeats() { HashVector result = new HashVector(); // result.reset(theta.size(),0.0); for (ParseResult p : bestParses) p.getFeats(result); if (bestParses.size() > 1) result.divideBy(bestParses.size()); return result; }
/** Parse the given string, return resulting data if appropriate. */ ParseResult internalParse( String s, Map<String, Integer> targetUnionDecisions, boolean mustConsumeStr) { boolean hasData = false; GenericData.Record gdr = new GenericData.Record(getAvroSchema()); String currentStr = s; for (InferredType subelt : structTypes) { if (currentStr.length() == 0) { return null; } ParseResult pr = subelt.internalParse(currentStr, targetUnionDecisions, false); if (pr == null) { return null; } if (pr.hasData()) { hasData = true; gdr.put(subelt.getName(), pr.getData()); } currentStr = pr.getRemainingString(); } if (mustConsumeStr && currentStr.trim().length() != 0) { return null; } return new ParseResult(gdr, hasData, currentStr); }
public boolean hasExp(Exp sem) { for (ParseResult p : allParses) { if (p.getExp().equals(sem)) { return true; } } return false; }
/** Returns the features for the highest-score current parse with semantics that equal sem. */ public HashVector getFeats(Exp sem) { HashVector result = new HashVector(); List<ParseResult> pr = findBestParses(allParses, sem); for (ParseResult p : pr) { p.getFeats(result); } if (pr.size() > 1) result.divideBy(pr.size()); return result; }
/** * Reads next line and notifies susbscribed observers. * * @return true if more lines would be available, false if there was no possible to read the line, * since the EOF has been reached. */ public boolean next() { if (this.isClosed) return false; if (this.nomore) return false; try { final boolean unpoll[] = new boolean[symbols.length]; long minTime = nextMilliseconds[0]; for (int i = 1; i < symbols.length; i++) { if (nextMilliseconds[i] > minTime) unpoll[i] = true; else if (nextMilliseconds[i] < minTime) { minTime = nextMilliseconds[i]; for (int j = 0; j < i; j++) unpoll[j] = true; } } boolean somethingToReturn = false; final ParseResult compossedResult = new ParseResult(); for (int i = 0; i < symbols.length; i++) { if (unpoll[i] || this.nextLine[i] == null) { // Nothing to send to listeners. } else { somethingToReturn = true; final ParseResult localResult = this.lineParser.parse(this.nextLine[i]); // Move to next valid line. String line = partReader[i].readLine(); while (line != null && !this.lineParser.isValid(line)) { line = partReader[i].readLine(); } if (line != null) { if (!symbols[i].equals(localResult.getSymbol(0))) { this.nextLine[i] = null; } else { this.nextLine[i] = line; this.nextMilliseconds[i] = this.lineParser.getUTCTimestamp(line).getTimeInMillis(); } } else { this.nextLine[i] = null; } compossedResult.merge(localResult); } } if (somethingToReturn) { for (int i = 0; i < this.spreadTradesMgr.length; i++) { this.spreadTradesMgr[i].accumulate(compossedResult); } } else { this.nomore = true; for (int i = 0; i < this.spreadTradesMgr.length; i++) { this.spreadTradesMgr[i].endAccumulation(); } } return somethingToReturn; } catch (Exception e) { log.log(Level.SEVERE, "Exception dealing with file '" + filePath + "'", e); return false; } }
public GenericContainer parse(String str) { // // Try the naive parse // ParseResult pr = internalParse(str, null, true); if (pr != null && pr.hasData()) { return (GenericContainer) pr.getData(); } // // Otherwise, we need to consider other union-options. // Unfold the candidate decisions into a series of target decisions // Map<String, Set<Integer>> candidateUnionDecisions = findCandidateUnionDecisions(); List<HashMap<String, Integer>> allUnionDecisions = new ArrayList<HashMap<String, Integer>>(); for (Map.Entry<String, Set<Integer>> pair : candidateUnionDecisions.entrySet()) { String k = pair.getKey(); Set<Integer> indices = pair.getValue(); if (allUnionDecisions.size() == 0) { for (Integer index : indices) { HashMap<String, Integer> newMap = new HashMap<String, Integer>(); newMap.put(k, index); allUnionDecisions.add(newMap); } } else { List<HashMap<String, Integer>> newUnionDecisions = new ArrayList<HashMap<String, Integer>>(); for (HashMap<String, Integer> curUnionDecisions : allUnionDecisions) { for (Integer index : indices) { HashMap<String, Integer> newMap = (HashMap<String, Integer>) curUnionDecisions.clone(); newMap.put(k, index); newUnionDecisions.add(newMap); } } allUnionDecisions = newUnionDecisions; } } // // Now execute all possible union decisions // for (Map<String, Integer> targetUnionDecisions : allUnionDecisions) { pr = internalParse(str, targetUnionDecisions, true); if (pr != null && pr.hasData()) { return (GenericContainer) pr.getData(); } } return null; }
private List<ParseResult> findBestParses(List<ParseResult> all, Exp e) { List<ParseResult> best = new LinkedList<ParseResult>(); double bestScore = -Double.MAX_VALUE; for (ParseResult p : all) { if (p.getExp().inferType() != null) { if ((e == null || p.getExp().equals(e))) { if (p.getScore() == bestScore) best.add(p); if (p.getScore() > bestScore) { bestScore = p.getScore(); best.clear(); best.add(p); } } } } return best; }
/** * Parse function for loading a PeakML file containing only masschromatogram entries. When an * entry of another type is encountered an {@link XmlParserException} is thrown. The resulting * {@link ParseResult} instance is type-bound to {@link MassChromatogram} to force only * masschromatogram types. * * @param in The input-stream to load the data from. * @return The header and peak information stored in the file. * @throws IOException Thrown on an IOException. * @throws XmlParserException Thrown when an unknown IPeak object is encountered. */ public static ParseResult parseMassChromatograms(InputStream in, ParserProgressListener listener) throws IOException, XmlParserException { final ParseResult result = new ParseResult(); final Vector<MassChromatogram<? extends Peak>> peaks = new Vector<MassChromatogram<? extends Peak>>(); final ParserProgressListener _listener = listener; class myListener implements XmlParserListener { int index = 0; public void onDocument(Document document, String xpath) throws XmlParserException { if (xpath.equals(XPATH_IPEAK)) { Node node = document.getChildNodes().item(0); // check whether we're getting the correct ipeak Node typeattribute = node.getAttributes().getNamedItem(PeakMLWriter.TYPE); if (typeattribute == null) throw new XmlParserException("Failed to locate the type attribute."); if (!typeattribute.getNodeValue().equals(PeakMLWriter.TYPE_MASSCHROMATOGRAM)) throw new XmlParserException( "IPeak (" + typeattribute.getNodeValue() + ") is not of type: '" + PeakMLWriter.TYPE_MASSCHROMATOGRAM + "'"); // parse this node as a mass chromatogram MassChromatogram<? extends Peak> masschromatogram = parseMassChromatogram(node); if (masschromatogram != null) peaks.add(masschromatogram); // if (_listener != null && result.header != null && result.header.getNrPeaks() != 0) _listener.update((100. * index++) / result.header.getNrPeaks()); } else if (xpath.equals(XPATH_HEADER)) { result.header = parseHeader(document.getFirstChild()); } } } run(in, new myListener()); result.measurement = new IPeakSet<MassChromatogram<? extends Peak>>(peaks); return result; }
/** * Parse function for blindly loading data from a PeakML file. This method loads the data in a * PeakML file as it encounters it in the file. This means that a mix-model is essentially * possible for peak-data stored in a PeakML file. The resulting {@link ParseResult} instance is * parameterized with {@link IPeak}. The class-information made available through the Java * language can be used to determine the original type of the peak. The function employs a * callback mechanism with {@link ParserProgressListener} to return information about the progress * through the file. This is calculated with the information returned by {@link * Header#getNrPeaks()}. * * <p>The loadall parameter can be used to restrict the amount of data actually being loaded by * the function. If this is set to false only data known by the {@link IPeak} class is loaded (in * this implementation this means that an instance of the {@link Centroid} is made). The class * information cannot be used to determine the original type of the peak when loadall is set to * false. * * @param in The input-stream to load the data from. * @param listener The progress listener. * @param loadall If set to false only the data known to IPeak is loaded as class Peak. * @return The header and peak information stored in the file. * @throws IOException Thrown on an IOException. * @throws XmlParserException Thrown when an unknown IPeak object is encountered. */ public static ParseResult parse(InputStream in, ParserProgressListener listener, boolean loadall) throws IOException, XmlParserException { // final boolean _loadall = loadall; final ParserProgressListener _listener = listener; final ParseResult result = new ParseResult(); final Vector<IPeak> peaks = new Vector<IPeak>(); class myListener implements XmlParserListener { int index = 0; public void onDocument(Document document, String xpath) throws XmlParserException { if (xpath.equals(XPATH_IPEAK)) { Node node = document.getChildNodes().item(0); // check whether we're getting the correct ipeak Node typeattribute = node.getAttributes().getNamedItem(PeakMLWriter.TYPE); if (typeattribute == null) throw new XmlParserException("Failed to locate a type attribute."); // ... // IPeak peak = (_loadall ? parseIPeak(node) : parseCentroid(node)); IPeak peak = parseIPeak(node); if (peak != null) peaks.add(peak); // if (_listener != null && result.header != null && result.header.getNrPeaks() != 0) _listener.update((100. * index++) / result.header.getNrPeaks()); } else if (xpath.equals(XPATH_HEADER)) { result.header = parseHeader(document.getFirstChild()); } } } run(in, new myListener()); result.measurement = new IPeakSet<IPeak>(peaks); return result; }
/** Parse the given string, return resulting data if appropriate. */ ParseResult internalParse( String s, Map<String, Integer> targetUnionDecisions, boolean mustConsumeStr) { boolean hasData = false; Schema localSchema = getAvroSchema(); GenericData.Array gda = new GenericData.Array(5, localSchema); Map<String, Integer> curUnionDecisions = new HashMap<String, Integer>(); String currentStr = s; while (true) { ParseResult pr = bodyType.internalParse(currentStr, targetUnionDecisions, false); if (pr == null) { break; } assert (pr.hasData()); gda.add(pr.getData()); currentStr = pr.getRemainingString(); } if (mustConsumeStr && currentStr.trim().length() != 0) { return null; } return new ParseResult(gda, true, currentStr); }
private List<ParseResult> removeRepeats(List<ParseResult> all) { System.out.println("----------------------- all.size equals to --------------" + all.size()); List<ParseResult> bestList = new LinkedList<ParseResult>(); for (int i = 0; i < all.size(); i++) { ParseResult e_i = all.get(i); boolean best = true; for (int j = i + 1; j < all.size(); j++) { ParseResult e_j = all.get(j); if (e_i.getExp().equals(e_j.getExp()) && e_i.getScore() <= e_j.getScore()) { best = false; break; } } if (best) bestList.add(e_i); } return bestList; }
public boolean isCorrect(String words, Exp sem, Parser parser) { List<ParseResult> parses = parser.bestParses(); if (parses.size() > 0) { noAnswer = false; } else { noAnswer = true; } if (parses.size() == 1) { ParseResult p = parses.get(0); Exp e = p.getExp(); e = e.copy(); e.simplify(); List l = p.getLexEntries(); parsed++; if (e.equals(sem)) { if (verbose) { System.out.println("CORRECT"); printLex(l); } int lits = sem.allLitsCount(); correctParses++; return true; } else { // one parse, it was wrong... oh well... if (verbose) { System.out.println("WRONG"); System.out.println(parses.size() + " parses: " + parses); printLex(l); } wrongParses++; boolean hasCorrect = parser.hasParseFor(sem); if (verbose) { System.out.println("Had correct parse: " + hasCorrect); System.out.print("Feats: "); Exp eb = parser.bestSem(); Chart c = parser.getChart(); HashVector h = c.computeExpFeatVals(eb); h.divideBy(c.computeNorm(eb)); h.dropSmallEntries(); System.out.println(h); } } } else { noParses++; if (parses.size() > 1) { // There are more than one equally high scoring // logical forms. If this is the case, we abstain // from returning a result. if (verbose) { System.out.println("too many parses"); System.out.println(parses.size() + " parses: " + parses); } Exp e = parses.get(0).getExp(); ParseResult p = parses.get(0); boolean hasCorrect = parser.hasParseFor(sem); if (verbose) System.out.println("Had correct parse: " + hasCorrect); } else { // no parses, potentially reparse with word skipping if (verbose) System.out.println("no parses"); if (emptyTest) { List<LexEntry> emps = new LinkedList<LexEntry>(); for (int j = 0; j < Globals.tokens.size(); j++) { List l = Globals.tokens.subList(j, j + 1); LexEntry le = new LexEntry(l, Cat.EMP); emps.add(le); } parser.setTempLexicon(new Lexicon(emps)); String mes = null; if (verbose) mes = "EMPTY"; parser.parseTimed(words, null, mes); parser.setTempLexicon(null); parses = parser.bestParses(); if (parses.size() == 1) { ParseResult p = parses.get(0); List l = p.getLexEntries(); Exp e = p.getExp(); e = e.copy(); e.simplify(); int noEmpty = p.noEmpty(); if (e.equals(sem)) { if (verbose) { System.out.println("CORRECT"); printLex(l); } emptyCorrect++; } else { // one parse, but wrong if (verbose) { System.out.println("WRONG: " + e); printLex(l); boolean hasCorrect = parser.hasParseFor(sem); System.out.println("Had correct parse: " + hasCorrect); } } } else { // too many parses or no parses emptyNoParses++; if (verbose) { System.out.println("WRONG:" + parses); boolean hasCorrect = parser.hasParseFor(sem); System.out.println("Had correct parse: " + hasCorrect); } } } } } return false; }