private void addSegs(List segs, Sequence output) { int segtype = -1; int startidx = -1; for (int j = 0; j < output.size(); j++) { // System.out.println("addSegs j="+j); Object tag = output.get(j); segtype = ArrayUtils.indexOf(segmentTags, tag.toString()); if (segtype > -1) { // System.out.println("...found segment "+tag); // A new segment is starting startidx = j; while (j < output.size() - 1) { // System.out.println("...inner addSegs j="+j); j++; Object nextTag = output.get(j); if (!nextTag.equals(tag)) { j--; segs.add(new Segment(segtype, startidx, j)); segtype = startidx = -1; break; } } } } // Handle end-of-sequence if (startidx > -1) { segs.add(new Segment(segtype, startidx, output.size() - 1)); } }
/** The longest dep set of the sub-sequence s[0..idx]. */ public static BitSet longestDepSet(Sequence s, int idx) { if (s.size() == 0) throw new IllegalArgumentException("size must be greater than 0."); if (idx < 0 || idx >= s.size()) throw new IllegalArgumentException(); assert s.size() <= maxlength; int max = -1; int maxidx = -1; for (int i = 0; i <= idx; i++) { BitSet set = sets[i]; set.clear(); set.set(i); lastuse[i] = i; for (Variable invar : s.getInputs(i)) { set.or(sets[lastuse[invar.index]]); lastuse[invar.index] = i; } int size = set.cardinality(); if (size > max) { max = size; maxidx = i; } } for (int i = 0; i < s.size(); i++) { // System.out.println("@ " + sets[i]); } return sets[maxidx]; }
@Override public void addSequence( Sequence candidate, int candidateIndex, int depth, HashTreeNode father, ArrayList<Sequence> allCandidates) { candidateIndicesSize++; if (candidateIndicesSize > candidateIndices.length) { if (depth < candidate.size() - 1) { // exchange this leaf node by inner node if it could become inner node HashTreeInnerNode newInner = new HashTreeInnerNode(); father.replaceNode(candidate.getItem(depth - 1), newInner); // and adding all sequences and last candidate for (int i = 0; i < candidateIndices.length; i++) { newInner.addSequence( allCandidates.get(candidateIndices[i]), candidateIndices[i], depth, father, allCandidates); } newInner.addSequence( allCandidates.get(candidateIndex), candidateIndex, depth, father, allCandidates); } else { int[] newIndices = new int[candidateIndices.length * 2]; System.arraycopy(candidateIndices, 0, newIndices, 0, candidateIndices.length); candidateIndices = newIndices; candidateIndices[candidateIndicesSize - 1] = candidateIndex; } } else { candidateIndices[candidateIndicesSize - 1] = candidateIndex; } }
public Instance pipe(Instance carrier) { Sequence data = (Sequence) carrier.getData(); Sequence target = (Sequence) carrier.getTarget(); if (data.size() != target.size()) throw new IllegalArgumentException( "Trying to print into SimpleTagger format, where data and target lengths do not match\n" + "data.length = " + data.size() + ", target.length = " + target.size()); int N = data.size(); if (data instanceof TokenSequence) { throw new UnsupportedOperationException("Not yet implemented."); } else if (data instanceof FeatureVectorSequence) { FeatureVectorSequence fvs = (FeatureVectorSequence) data; Alphabet dict = (fvs.size() > 0) ? fvs.getFeatureVector(0).getAlphabet() : null; for (int i = 0; i < N; i++) { Object label = target.get(i); writer.print(label); FeatureVector fv = fvs.getFeatureVector(i); for (int loc = 0; loc < fv.numLocations(); loc++) { writer.print(' '); String fname = dict.lookupObject(fv.indexAtLocation(loc)).toString(); double value = fv.valueAtLocation(loc); // if (!Maths.almostEquals(value, 1.0)) { // throw new IllegalArgumentException ("Printing to SimpleTagger format: FeatureVector // not binary at time slice "+i+" fv:"+fv); // } writer.print(fname + String.valueOf(value)); } writer.println(); } } else { throw new IllegalArgumentException("Don't know how to print data of type " + data); } writer.println(); // writer.print(getDataAlphabet()); return carrier; }
public ConfidenceEvaluator(InstanceWithConfidence[] instances, boolean sorted) { this.confidences = new Vector(); for (int i = 0; i < instances.length; i++) { Sequence input = (Sequence) instances[i].getInstance().getData(); confidences.add( new EntityConfidence( instances[i].getConfidence(), instances[i].correct(), input, 0, input.size() - 1)); } if (!sorted) Collections.sort(confidences, new ConfidenceComparator()); this.nBins = DEFAULT_NUM_BINS; this.numCorrect = getNumCorrectEntities(); }
public static Sequence getLongestDepSetSubSequence(Sequence s) { BitSet indices = longestDepSet(s, s.size() - 1); int length = indices.length(); assert indices.get(length - 1); Map<Integer, Integer> newIdx = new LinkedHashMap<Integer, Integer>(); int count = 0; for (int i = 0; i < length; i++) { if (!indices.get(i)) continue; newIdx.put(i, count++); } Sequence news = new Sequence(); for (int i = 0; i < length; i++) { if (!indices.get(i)) continue; List<Variable> oldins = s.getInputs(i); List<Variable> newins = new ArrayList<Variable>(oldins.size()); for (Variable oldv : oldins) { newins.add(news.getVariable(newIdx.get(oldv.index))); } news = news.extend(s.getStatementKind(i), newins); } return news; }
// This method is responsible for doing two things: // // 1. Selecting at random a collection of sequences that can be used to // create input values for the given statement, and // // 2. Selecting at random valid indices to the above sequence specifying // the values to be used as input to the statement. // // The selected sequences and indices are wrapped in an InputsAndSuccessFlag // object and returned. If an appropriate collection of sequences and indices // was not found (e.g. because there are no sequences in the componentManager // that create values of some type required by the statement), the success flag // of the returned object is false. @SuppressWarnings("unchecked") private InputsAndSuccessFlag selectInputs(StatementKind statement) { Tracer.trace("selectInputs"); // Variable inputTypes containsthe values required as input to the // statement given as a parameter to the selectInputs method. List<Class<?>> inputTypes = statement.getInputTypes(); // The rest of the code in this method will attempt to create // a sequence that creates at least one value of type T for // every type T in inputTypes, and thus can be used to create all the // inputs for the statement. // We denote this goal sequence as "S". We don't create S explicitly, but // define it as the concatenation of the following list of sequences. // In other words, S = sequences[0] + ... + sequences[sequences.size()-1]. // (This representation choice is for efficiency: it is cheaper to perform // a single concatenation of the subsequences in the end than repeatedly // extending S.) List<Sequence> sequences = new ArrayList<Sequence>(); // We store the total size of S in the following variable. int totStatements = 0; // The method also returns a list of randomly-selected variables to // be used as inputs to the statement, represented as indices into S. // For example, given as statement a method M(T1)/T2 that takes as input // a value of type T1 and returns a value of type T2, this method might // return, for example, the sequence // // T0 var0 = new T0(); T1 var1 = var0.getT1()" // // and the singleton list [0] that represents variable var1. The variable // indices are stored in the following list. Upon successful completion // of this method, variables will contain inputTypes.size() variables. // Note additionally that for every i in variables, 0 <= i < |S|. List<Integer> variables = new ArrayList<Integer>(); // [Optimization] // The following two variables are used in the loop below only when // an alias ratio is present (GenInputsAbstract.alias_ratio != null). // Their purpose is purely to improve efficiency. For a given loop iteration // i, "types" contains the types of all variables in S, and "typesToVars" // maps each type to all variable indices of the given type. SubTypeSet types = new SubTypeSet(false); MultiMap<Class<?>, Integer> typesToVars = new MultiMap<Class<?>, Integer>(); for (int i = 0; i < inputTypes.size(); i++) { Class<?> t = inputTypes.get(i); // TODO Does this ever happen? if (!Reflection.isVisible(t)) return new InputsAndSuccessFlag(false, null, null); // true if statement st represents an instance method, and we are currently // selecting a value to act as the receiver for the method. boolean isReceiver = (i == 0 && (statement instanceof RMethod) && (!((RMethod) statement).isStatic())); // If alias ratio is given, attempt with some probability to use a variable already in S. if (GenInputsAbstract.alias_ratio != 0 && Randomness.weighedCoinFlip(GenInputsAbstract.alias_ratio)) { Tracer.trace("alias_ratio@selectInputs"); // candidateVars will store the indices that can serve as input to the i-th input in st. List<SimpleList<Integer>> candidateVars = new ArrayList<SimpleList<Integer>>(); // For each type T in S compatible with inputTypes[i], add all the indices in S of type T. for (Class<?> match : types.getMatches(t)) { // Sanity check: the domain of typesToVars contains all the types in variable types. assert typesToVars.keySet().contains(match); candidateVars.add( new ArrayListSimpleList<Integer>( new ArrayList<Integer>(typesToVars.getValues(match)))); } // If any type-compatible variables found, pick one at random as the i-th input to st. SimpleList<Integer> candidateVars2 = new ListOfLists<Integer>(candidateVars); if (candidateVars2.size() > 0) { int randVarIdx = Randomness.nextRandomInt(candidateVars2.size()); Integer randVar = candidateVars2.get(randVarIdx); variables.add(randVar); continue; } } Tracer.trace("NO alias_ratio@selectInputs"); // If we got here, it means we will not attempt to use a value already defined in S, // so we will have to augment S with new statements that yield a value of type inputTypes[i]. // We will do this by assembling a list of candidate sequences n(stored in the list declared // immediately below) that create one or more values of the appropriate type, // randomly selecting a single sequence from this list, and appending it to S. SimpleList<Sequence> l = null; // We use one of three ways to gather candidate sequences, but the third case below // is by far the most common. if (GenInputsAbstract.always_use_ints_as_objects && t.equals(Object.class)) { Tracer.trace("always_use_ints_as_objects@selectInputs"); // 1. OBSCURE, applicable only for branch-directed generation project. Get all // sequences that create one or more integer. Applicable only when inputTypes[i] // is "Object" and always_use_ints_as_objects option is specified. if (Log.isLoggingOn()) Log.logLine("Integer-as-object heuristic: will use random Integer."); l = componentManager.getSequencesForType(int.class, false); } else if (t.isArray()) { // 2. If T=inputTypes[i] is an array type, ask the component manager for all sequences // of type T (list l1), but also try to directly build some sequences that create arrays // (list l2). SimpleList<Sequence> l1 = componentManager.getSequencesForType(statement, i); if (Log.isLoggingOn()) Log.logLine("Array creation heuristic: will create helper array of type " + t); SimpleList<Sequence> l2 = HelperSequenceCreator.createSequence(componentManager, t); l = new ListOfLists<Sequence>(l1, l2); } else { // 3. COMMON CASE: ask the component manager for all sequences that yield the required type. if (Log.isLoggingOn()) Log.logLine("Will query component set for objects of type" + t); l = componentManager.getSequencesForType(statement, i); } assert l != null; if (Log.isLoggingOn()) Log.logLine("components: " + l.size()); // If we were not able to find (or create) any sequences of type inputTypes[i], and we are // allowed the use null values, use null. If we're not allowed, then return with failure. if (l.size() == 0) { Tracer.trace("selectinputs-evalforbid"); if (isReceiver || GenInputsAbstract.forbid_null) { if (!isReceiver) { if (GenInputsAbstract.forbid_null) { Tracer.trace("forbid_null@selectinputs-evalforbid"); } } if (Log.isLoggingOn()) Log.logLine("forbid-null option is true. Failed to create new sequence."); return new InputsAndSuccessFlag(false, null, null); } else { if (!GenInputsAbstract.forbid_null) { Tracer.trace("NOT forbid_null@selectinputs-evalforbid"); } if (Log.isLoggingOn()) Log.logLine("Will use null as " + i + "-th input"); StatementKind st = PrimitiveOrStringOrNullDecl.nullOrZeroDecl(t); Sequence seq = new Sequence().extend(st, new ArrayList<Variable>()); variables.add(totStatements); sequences.add(seq); assert seq.size() == 1; totStatements++; // Null is not an interesting value to add to the set of // possible values to reuse, so we don't update typesToVars or types. continue; } } // At this point, we have one or more sequences that create non-null values of type // inputTypes[i]. // However, the user may have requested that we use null values as inputs with some given // frequency. // If this is the case, then use null instead with some probability. Tracer.trace("selectinputs-null-ratio"); if (!isReceiver && GenInputsAbstract.null_ratio != 0 && Randomness.weighedCoinFlip(GenInputsAbstract.null_ratio)) { Tracer.trace("null_ratio@selectinputs-null-ratio"); if (Log.isLoggingOn()) Log.logLine("null-ratio option given. Randomly decided to use null as input."); StatementKind st = PrimitiveOrStringOrNullDecl.nullOrZeroDecl(t); Sequence seq = new Sequence().extend(st, new ArrayList<Variable>()); variables.add(totStatements); sequences.add(seq); assert seq.size() == 1; totStatements++; continue; } Tracer.trace("NOT null_ratio@selectinputs-null-ratio"); // At this point, we have a list of candidate sequences and need to select a // randomly-chosen sequence from the list. Sequence chosenSeq = null; Tracer.trace("selectInputs-smalltests"); if (GenInputsAbstract.small_tests) { Tracer.trace("small_tests@selectInputs-smalltests"); chosenSeq = Randomness.randomMemberWeighted(l); } else { Tracer.trace("NO small_tests@selectInputs-smalltests"); chosenSeq = Randomness.randomMember(l); } // Now, find values that satisfy the constraint set. Match m = Match.COMPATIBLE_TYPE; // if (i == 0 && statement.isInstanceMethod()) m = Match.EXACT_TYPE; Variable randomVariable = chosenSeq.randomVariableForTypeLastStatement(t, m); // We are not done yet: we have chosen a sequence that yields a value of the required // type inputTypes[i], but there may be more than one such value. Our last random // selection step is to select from among all possible values. // if (i == 0 && statement.isInstanceMethod()) m = Match.EXACT_TYPE; if (randomVariable == null) { throw new BugInRandoopException("type: " + t + ", sequence: " + chosenSeq); } // If we were unlucky and selected a null value as the receiver // for a method call, return with failure. if (i == 0 && (statement instanceof RMethod) && (!((RMethod) statement).isStatic()) && chosenSeq.getCreatingStatement(randomVariable) instanceof PrimitiveOrStringOrNullDecl) return new InputsAndSuccessFlag(false, null, null); // [Optimization.] Update optimization-related variables "types" and "typesToVars". Tracer.trace("selectinputs-alias"); if (GenInputsAbstract.alias_ratio != 0) { Tracer.trace("alias_ratio@selectinputs-alias"); // Update types and typesToVars. for (int j = 0; j < chosenSeq.size(); j++) { StatementKind stk = chosenSeq.getStatementKind(j); if (stk instanceof PrimitiveOrStringOrNullDecl) continue; // Prim decl not an interesting candidate for multiple uses. Class<?> outType = stk.getOutputType(); types.add(outType); typesToVars.add(outType, totStatements + j); } } Tracer.trace("NOT alias_ratio@selectinputs-alias"); variables.add(totStatements + randomVariable.index); sequences.add(chosenSeq); totStatements += chosenSeq.size(); } return new InputsAndSuccessFlag(true, sequences, variables); }
/** * Tries to create and execute a new sequence. If the sequence is new (not already in the * specified component manager), then it is executed and added to the manager's sequences. If the * sequence created is already in the manager's sequences, this method has no effect, and returns * null. */ private ExecutableSequence createNewUniqueSequence() { Tracer.trace("createNewUniqueSequence"); if (Log.isLoggingOn()) Log.logLine("-------------------------------------------"); StatementKind statement = null; if (this.statements.isEmpty()) return null; // Select a StatementInfo statement = Randomness.randomMember(this.statements); if (Log.isLoggingOn()) Log.logLine("Selected statement: " + statement.toString()); // jhp: add flags here InputsAndSuccessFlag sequences = selectInputs(statement); if (!sequences.success) { if (Log.isLoggingOn()) Log.logLine("Failed to find inputs for statement."); return null; } Sequence concatSeq = Sequence.concatenate(sequences.sequences); // Figure out input variables. List<Variable> inputs = new ArrayList<Variable>(); for (Integer oneinput : sequences.indices) { Variable v = concatSeq.getVariable(oneinput); inputs.add(v); } Sequence newSequence = concatSeq.extend(statement, inputs); // With .5 probability, do a primitive value heuristic. Tracer.trace("heuristic-uniquesequence"); if (GenInputsAbstract.repeat_heuristic && Randomness.nextRandomInt(10) == 0) { Tracer.trace("repeat_heuristic@heuristic-uniquesequence"); int times = Randomness.nextRandomInt(100); newSequence = newSequence.repeatLast(times); if (Log.isLoggingOn()) Log.log(">>>" + times + newSequence.toCodeString()); } Tracer.trace("NO_repeat_heuristic@heuristic-uniquesequence"); // If parameterless statement, subsequence inputs // will all be redundant, so just remove it from list of statements. if (statement.getInputTypes().size() == 0) { statements.remove(statement); } // If sequence is larger than size limit, try again. Tracer.trace("evaluating-maxsize"); if (newSequence.size() > GenInputsAbstract.maxsize) { Tracer.trace(">maxsize@evaluating-maxsize"); if (Log.isLoggingOn()) Log.logLine( "Sequence discarded because size " + newSequence.size() + " exceeds maximum allowed size " + GenInputsAbstract.maxsize); return null; } Tracer.trace("<maxsize@evaluating-maxsize"); randoopConsistencyTests(newSequence); if (this.allSequences.contains(newSequence)) { Tracer.trace("discard existing"); if (Log.isLoggingOn()) Log.logLine("Sequence discarded because the same sequence was previously created."); return null; } this.allSequences.add(newSequence); for (Sequence s : sequences.sequences) { s.lastTimeUsed = java.lang.System.currentTimeMillis(); } randoopConsistencyTest2(newSequence); if (Log.isLoggingOn()) { Log.logLine("Successfully created new unique sequence:" + newSequence.toString()); } // System.out.println("###" + statement.toStringVerbose() + "###" + statement.getClass()); // Keep track of any input sequences that are used in this sequence // Tests that contain only these sequences are probably redundant for (Sequence is : sequences.sequences) { subsumed_sequences.add(is); } return new ExecutableSequence(newSequence); }
public S8Vector(Sequence paramSequence) { this.data = new byte[paramSequence.size()]; addAll(paramSequence); }
public static BitSet longestDepSet(Sequence s) { return longestDepSet(s, s.size() - 1); }
public void test( Transducer transducer, InstanceList data, String description, PrintStream viterbiOutputStream) { int[] ntrue = new int[segmentTags.length]; int[] npred = new int[segmentTags.length]; int[] ncorr = new int[segmentTags.length]; LabelAlphabet dict = (LabelAlphabet) transducer.getInputPipe().getTargetAlphabet(); for (int i = 0; i < data.size(); i++) { Instance instance = data.getInstance(i); Sequence input = (Sequence) instance.getData(); Sequence trueOutput = (Sequence) instance.getTarget(); assert (input.size() == trueOutput.size()); Sequence predOutput = transducer.viterbiPath(input).output(); assert (predOutput.size() == trueOutput.size()); List trueSegs = new ArrayList(); List predSegs = new ArrayList(); addSegs(trueSegs, trueOutput); addSegs(predSegs, predOutput); // System.out.println("FieldF1Evaluator instance "+instance.getName ()); // printSegs(dict, trueSegs, "True"); // printSegs(dict, predSegs, "Pred"); for (Iterator it = predSegs.iterator(); it.hasNext(); ) { Segment seg = (Segment) it.next(); npred[seg.tag]++; if (trueSegs.contains(seg)) { ncorr[seg.tag]++; } } for (Iterator it = trueSegs.iterator(); it.hasNext(); ) { Segment seg = (Segment) it.next(); ntrue[seg.tag]++; } } DecimalFormat f = new DecimalFormat("0.####"); logger.info(description + " per-field F1"); for (int tag = 0; tag < segmentTags.length; tag++) { double precision = ((double) ncorr[tag]) / npred[tag]; double recall = ((double) ncorr[tag]) / ntrue[tag]; double f1 = (2 * precision * recall) / (precision + recall); Label name = dict.lookupLabel(segmentTags[tag]); logger.info( " segments " + name + " true = " + ntrue[tag] + " pred = " + npred[tag] + " correct = " + ncorr[tag]); logger.info( " precision=" + f.format(precision) + " recall=" + f.format(recall) + " f1=" + f.format(f1)); } }
public S16Vector(Sequence sequence) { data = new short[sequence.size()]; addAll(sequence); }