public void testO2PMap() { // Long-long TObjectLongHashMap<Long> olmap = new TObjectLongHashMap<Long>(); assertTrue(serializesCorrectly(olmap, "o2p-l-1")); olmap.put(Long.valueOf(0), 1); assertTrue(serializesCorrectly(olmap, "o2p-l-2")); olmap.put(Long.valueOf(Long.MIN_VALUE), Long.MIN_VALUE); assertTrue(serializesCorrectly(olmap, "o2p-l-3")); olmap.put(Long.valueOf(Long.MAX_VALUE), Long.MAX_VALUE); assertTrue(serializesCorrectly(olmap, "o2p-l-4")); // Int-int TObjectIntHashMap<Integer> oimap = new TObjectIntHashMap<Integer>(); assertTrue(serializesCorrectly(oimap, "o2p-i-1")); oimap.put(Integer.valueOf(0), 1); assertTrue(serializesCorrectly(oimap, "o2p-i-2")); oimap.put(Integer.valueOf(Integer.MIN_VALUE), Integer.MIN_VALUE); assertTrue(serializesCorrectly(oimap, "o2p-i-3")); oimap.put(Integer.valueOf(Integer.MAX_VALUE), Integer.MAX_VALUE); assertTrue(serializesCorrectly(oimap, "o2p-i-4")); // Double-double TObjectDoubleHashMap<Double> odmap = new TObjectDoubleHashMap<Double>(); assertTrue(serializesCorrectly(odmap, "o2p-d-1")); odmap.put(Double.valueOf(0), 1); assertTrue(serializesCorrectly(odmap, "o2p-d-2")); odmap.put(Double.valueOf(Double.MIN_VALUE), Double.MIN_VALUE); assertTrue(serializesCorrectly(odmap, "o2p-d-3")); odmap.put(Double.valueOf(Double.MAX_VALUE), Double.MAX_VALUE); assertTrue(serializesCorrectly(odmap, "o2p-d-4")); odmap.put(Double.valueOf(Double.POSITIVE_INFINITY), Double.POSITIVE_INFINITY); assertTrue(serializesCorrectly(odmap, "o2p-d-5")); odmap.put(Double.valueOf(Double.NEGATIVE_INFINITY), Double.NEGATIVE_INFINITY); assertTrue(serializesCorrectly(odmap, "o2p-d-6")); // NOTE: trove doesn't deal well with NaN // ddmap.put( Double.NaN, Double.NaN ); // assertTrue( serializesCorrectly( ddmap ) ); // Float-float TObjectFloatHashMap<Float> ofmap = new TObjectFloatHashMap<Float>(); assertTrue(serializesCorrectly(ofmap, "o2p-f-1")); ofmap.put(Float.valueOf(0), 1); assertTrue(serializesCorrectly(ofmap, "o2p-f-2")); ofmap.put(Float.valueOf(Float.MIN_VALUE), Float.MIN_VALUE); assertTrue(serializesCorrectly(ofmap, "o2p-f-3")); ofmap.put(Float.valueOf(Float.MAX_VALUE), Float.MAX_VALUE); assertTrue(serializesCorrectly(ofmap, "o2p-f-4")); ofmap.put(Float.valueOf(Float.POSITIVE_INFINITY), Float.POSITIVE_INFINITY); assertTrue(serializesCorrectly(ofmap, "o2p-f-5")); ofmap.put(Float.valueOf(Float.NEGATIVE_INFINITY), Float.NEGATIVE_INFINITY); assertTrue(serializesCorrectly(ofmap, "o2p-f-6")); // NOTE: trove doesn't deal well with NaN // ffmap.put( Float.NaN, Float.NaN ); // assertTrue( serializesCorrectly( ffmap ) ); }
/** * Reads events from <tt>eventStream</tt> into a linked list. The predicates associated with each * event are counted and any which occur at least <tt>cutoff</tt> times are added to the * <tt>predicatesInOut</tt> map along with a unique integer index. * * @param eventStream an <code>EventStream</code> value * @param eventStore a writer to which the events are written to for later processing. * @param predicatesInOut a <code>TObjectIntHashMap</code> value * @param cutoff an <code>int</code> value */ private int computeEventCounts( EventStream eventStream, Writer eventStore, TObjectIntHashMap predicatesInOut, int cutoff) throws IOException { TObjectIntHashMap counter = new TObjectIntHashMap(); int predicateIndex = 0; int eventCount = 0; while (eventStream.hasNext()) { Event ev = eventStream.nextEvent(); eventCount++; eventStore.write(FileEventStream.toLine(ev)); String[] ec = ev.getContext(); for (int j = 0; j < ec.length; j++) { if (!predicatesInOut.containsKey(ec[j])) { if (counter.increment(ec[j])) { } else { counter.put(ec[j], 1); } if (counter.get(ec[j]) >= cutoff) { predicatesInOut.put(ec[j], predicateIndex++); counter.remove(ec[j]); } } } } predicatesInOut.trimToSize(); eventStore.close(); return eventCount; }
private List index(int numEvents, EventStream es, TObjectIntHashMap predicateIndex) { TObjectIntHashMap omap = new TObjectIntHashMap(); int outcomeCount = 0; List eventsToCompare = new ArrayList(numEvents); TIntArrayList indexedContext = new TIntArrayList(); while (es.hasNext()) { Event ev = es.nextEvent(); String[] econtext = ev.getContext(); ComparableEvent ce; int ocID; String oc = ev.getOutcome(); if (omap.containsKey(oc)) { ocID = omap.get(oc); } else { ocID = outcomeCount++; omap.put(oc, ocID); } for (int i = 0; i < econtext.length; i++) { String pred = econtext[i]; if (predicateIndex.containsKey(pred)) { indexedContext.add(predicateIndex.get(pred)); } } // drop events with no active features if (indexedContext.size() > 0) { ce = new ComparableEvent(ocID, indexedContext.toNativeArray()); eventsToCompare.add(ce); } else { System.err.println( "Dropped event " + ev.getOutcome() + ":" + Arrays.asList(ev.getContext())); } // recycle the TIntArrayList indexedContext.resetQuick(); } outcomeLabels = toIndexedStringArray(omap); predLabels = toIndexedStringArray(predicateIndex); return eventsToCompare; }
public static void main(String[] args) { // String file = args[0]; IntInt2IntHashMap lEr = new IntInt2IntHashMap(); HashMap<String, TObjectIntHashMap<String>> stat = new HashMap<String, TObjectIntHashMap<String>>(); try { HashSet<String> headPat = PatternFileParser.parse(new File("ipa_head_pat.txt")); HashSet<String> funcPat = PatternFileParser.parse(new File("ipa_func_pat.txt")); CaboCha2Dep pipe = new CaboCha2Dep(System.in); JapaneseDependencyTree2CaboCha caboChaOutPipe = new JapaneseDependencyTree2CaboCha(); caboChaOutPipe.setFormat(CaboChaFormat.OLD); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(System.out, "utf-8")); while (!pipe.eof()) { DependencyTree tree = pipe.pipePerSentence(); if (tree == null) { continue; } JapaneseDependencyTreeLib.setBunsetsuHead(tree, funcPat, headPat); PredicateArgumentStructure[] pasList = tree.getPASList(); for (int j = 0; j < pasList.length; j++) { int predId = pasList[j].getPredicateId(); String predType = pasList[j].predicateType; int[] aIds = pasList[j].getIds(); String[] aLabels = pasList[j].getLabels(); for (int k = 0; k < aIds.length; k++) { DepType dt = getDepType(tree, predId, aIds[k]); ArgPositionType apt = getArgPositionType(tree, aIds[k]); if (!stat.containsKey(aLabels[k])) { stat.put(aLabels[k], new TObjectIntHashMap<String>()); } TObjectIntHashMap<String> inner = stat.get(aLabels[k]); if (!inner.containsKey(dt.toString() + ":" + apt.toString())) { inner.put(dt.toString() + ":" + apt.toString(), 0); } inner.increment(dt.toString() + ":" + apt.toString()); aLabels[k] += ":" + dt + ":" + apt; } } StringBuilder resultStr = new StringBuilder(); resultStr.append(caboChaOutPipe.pipePerSentence(tree)); writer.write(resultStr.toString()); } // print statistics for (Iterator it = stat.keySet().iterator(); it.hasNext(); ) { String key = (String) it.next(); TObjectIntHashMap inner = stat.get(key); for (TObjectIntIterator iit = inner.iterator(); iit.hasNext(); ) { iit.advance(); System.err.print(key + "\t" + iit.key() + "\t" + iit.value() + "\n"); } } } catch (IOException e) { e.printStackTrace(); } }