/** * Reads events from <tt>eventStream</tt> into a linked list. The predicates associated with each * event are counted and any which occur at least <tt>cutoff</tt> times are added to the * <tt>predicatesInOut</tt> map along with a unique integer index. * * @param eventStream an <code>EventStream</code> value * @param eventStore a writer to which the events are written to for later processing. * @param predicatesInOut a <code>TObjectIntHashMap</code> value * @param cutoff an <code>int</code> value */ private int computeEventCounts( EventStream eventStream, Writer eventStore, TObjectIntHashMap predicatesInOut, int cutoff) throws IOException { TObjectIntHashMap counter = new TObjectIntHashMap(); int predicateIndex = 0; int eventCount = 0; while (eventStream.hasNext()) { Event ev = eventStream.nextEvent(); eventCount++; eventStore.write(FileEventStream.toLine(ev)); String[] ec = ev.getContext(); for (int j = 0; j < ec.length; j++) { if (!predicatesInOut.containsKey(ec[j])) { if (counter.increment(ec[j])) { } else { counter.put(ec[j], 1); } if (counter.get(ec[j]) >= cutoff) { predicatesInOut.put(ec[j], predicateIndex++); counter.remove(ec[j]); } } } } predicatesInOut.trimToSize(); eventStore.close(); return eventCount; }
private List index(int numEvents, EventStream es, TObjectIntHashMap predicateIndex) { TObjectIntHashMap omap = new TObjectIntHashMap(); int outcomeCount = 0; List eventsToCompare = new ArrayList(numEvents); TIntArrayList indexedContext = new TIntArrayList(); while (es.hasNext()) { Event ev = es.nextEvent(); String[] econtext = ev.getContext(); ComparableEvent ce; int ocID; String oc = ev.getOutcome(); if (omap.containsKey(oc)) { ocID = omap.get(oc); } else { ocID = outcomeCount++; omap.put(oc, ocID); } for (int i = 0; i < econtext.length; i++) { String pred = econtext[i]; if (predicateIndex.containsKey(pred)) { indexedContext.add(predicateIndex.get(pred)); } } // drop events with no active features if (indexedContext.size() > 0) { ce = new ComparableEvent(ocID, indexedContext.toNativeArray()); eventsToCompare.add(ce); } else { System.err.println( "Dropped event " + ev.getOutcome() + ":" + Arrays.asList(ev.getContext())); } // recycle the TIntArrayList indexedContext.resetQuick(); } outcomeLabels = toIndexedStringArray(omap); predLabels = toIndexedStringArray(predicateIndex); return eventsToCompare; }
public static void main(String[] args) { // String file = args[0]; IntInt2IntHashMap lEr = new IntInt2IntHashMap(); HashMap<String, TObjectIntHashMap<String>> stat = new HashMap<String, TObjectIntHashMap<String>>(); try { HashSet<String> headPat = PatternFileParser.parse(new File("ipa_head_pat.txt")); HashSet<String> funcPat = PatternFileParser.parse(new File("ipa_func_pat.txt")); CaboCha2Dep pipe = new CaboCha2Dep(System.in); JapaneseDependencyTree2CaboCha caboChaOutPipe = new JapaneseDependencyTree2CaboCha(); caboChaOutPipe.setFormat(CaboChaFormat.OLD); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(System.out, "utf-8")); while (!pipe.eof()) { DependencyTree tree = pipe.pipePerSentence(); if (tree == null) { continue; } JapaneseDependencyTreeLib.setBunsetsuHead(tree, funcPat, headPat); PredicateArgumentStructure[] pasList = tree.getPASList(); for (int j = 0; j < pasList.length; j++) { int predId = pasList[j].getPredicateId(); String predType = pasList[j].predicateType; int[] aIds = pasList[j].getIds(); String[] aLabels = pasList[j].getLabels(); for (int k = 0; k < aIds.length; k++) { DepType dt = getDepType(tree, predId, aIds[k]); ArgPositionType apt = getArgPositionType(tree, aIds[k]); if (!stat.containsKey(aLabels[k])) { stat.put(aLabels[k], new TObjectIntHashMap<String>()); } TObjectIntHashMap<String> inner = stat.get(aLabels[k]); if (!inner.containsKey(dt.toString() + ":" + apt.toString())) { inner.put(dt.toString() + ":" + apt.toString(), 0); } inner.increment(dt.toString() + ":" + apt.toString()); aLabels[k] += ":" + dt + ":" + apt; } } StringBuilder resultStr = new StringBuilder(); resultStr.append(caboChaOutPipe.pipePerSentence(tree)); writer.write(resultStr.toString()); } // print statistics for (Iterator it = stat.keySet().iterator(); it.hasNext(); ) { String key = (String) it.next(); TObjectIntHashMap inner = stat.get(key); for (TObjectIntIterator iit = inner.iterator(); iit.hasNext(); ) { iit.advance(); System.err.print(key + "\t" + iit.key() + "\t" + iit.value() + "\n"); } } } catch (IOException e) { e.printStackTrace(); } }