private void addSegs(List segs, Sequence output) {
    int segtype = -1;
    int startidx = -1;

    for (int j = 0; j < output.size(); j++) {
      //      System.out.println("addSegs j="+j);
      Object tag = output.get(j);
      segtype = ArrayUtils.indexOf(segmentTags, tag.toString());

      if (segtype > -1) {
        //        System.out.println("...found segment "+tag);
        // A new segment is starting
        startidx = j;
        while (j < output.size() - 1) {
          //          System.out.println("...inner addSegs j="+j);
          j++;
          Object nextTag = output.get(j);
          if (!nextTag.equals(tag)) {
            j--;
            segs.add(new Segment(segtype, startidx, j));
            segtype = startidx = -1;
            break;
          }
        }
      }
    }

    // Handle end-of-sequence
    if (startidx > -1) {
      segs.add(new Segment(segtype, startidx, output.size() - 1));
    }
  }
 /** The longest dep set of the sub-sequence s[0..idx]. */
 public static BitSet longestDepSet(Sequence s, int idx) {
   if (s.size() == 0) throw new IllegalArgumentException("size must be greater than 0.");
   if (idx < 0 || idx >= s.size()) throw new IllegalArgumentException();
   assert s.size() <= maxlength;
   int max = -1;
   int maxidx = -1;
   for (int i = 0; i <= idx; i++) {
     BitSet set = sets[i];
     set.clear();
     set.set(i);
     lastuse[i] = i;
     for (Variable invar : s.getInputs(i)) {
       set.or(sets[lastuse[invar.index]]);
       lastuse[invar.index] = i;
     }
     int size = set.cardinality();
     if (size > max) {
       max = size;
       maxidx = i;
     }
   }
   for (int i = 0; i < s.size(); i++) {
     // System.out.println("@ " + sets[i]);
   }
   return sets[maxidx];
 }
  @Override
  public void addSequence(
      Sequence candidate,
      int candidateIndex,
      int depth,
      HashTreeNode father,
      ArrayList<Sequence> allCandidates) {
    candidateIndicesSize++;
    if (candidateIndicesSize > candidateIndices.length) {
      if (depth < candidate.size() - 1) {
        // exchange this leaf node by inner node if it could become inner node
        HashTreeInnerNode newInner = new HashTreeInnerNode();
        father.replaceNode(candidate.getItem(depth - 1), newInner);

        // and adding all sequences and last candidate
        for (int i = 0; i < candidateIndices.length; i++) {
          newInner.addSequence(
              allCandidates.get(candidateIndices[i]),
              candidateIndices[i],
              depth,
              father,
              allCandidates);
        }
        newInner.addSequence(
            allCandidates.get(candidateIndex), candidateIndex, depth, father, allCandidates);
      } else {
        int[] newIndices = new int[candidateIndices.length * 2];
        System.arraycopy(candidateIndices, 0, newIndices, 0, candidateIndices.length);
        candidateIndices = newIndices;
        candidateIndices[candidateIndicesSize - 1] = candidateIndex;
      }
    } else {
      candidateIndices[candidateIndicesSize - 1] = candidateIndex;
    }
  }
  public Instance pipe(Instance carrier) {
    Sequence data = (Sequence) carrier.getData();
    Sequence target = (Sequence) carrier.getTarget();

    if (data.size() != target.size())
      throw new IllegalArgumentException(
          "Trying to print into SimpleTagger format, where data and target lengths do not match\n"
              + "data.length = "
              + data.size()
              + ", target.length = "
              + target.size());

    int N = data.size();

    if (data instanceof TokenSequence) {
      throw new UnsupportedOperationException("Not yet implemented.");
    } else if (data instanceof FeatureVectorSequence) {

      FeatureVectorSequence fvs = (FeatureVectorSequence) data;
      Alphabet dict = (fvs.size() > 0) ? fvs.getFeatureVector(0).getAlphabet() : null;

      for (int i = 0; i < N; i++) {
        Object label = target.get(i);
        writer.print(label);

        FeatureVector fv = fvs.getFeatureVector(i);
        for (int loc = 0; loc < fv.numLocations(); loc++) {
          writer.print(' ');
          String fname = dict.lookupObject(fv.indexAtLocation(loc)).toString();
          double value = fv.valueAtLocation(loc);
          // if (!Maths.almostEquals(value, 1.0)) {
          //    throw new IllegalArgumentException ("Printing to SimpleTagger format: FeatureVector
          // not binary at time slice "+i+" fv:"+fv);
          // }
          writer.print(fname + String.valueOf(value));
        }
        writer.println();
      }
    } else {
      throw new IllegalArgumentException("Don't know how to print data of type " + data);
    }

    writer.println();
    // writer.print(getDataAlphabet());

    return carrier;
  }
 public ConfidenceEvaluator(InstanceWithConfidence[] instances, boolean sorted) {
   this.confidences = new Vector();
   for (int i = 0; i < instances.length; i++) {
     Sequence input = (Sequence) instances[i].getInstance().getData();
     confidences.add(
         new EntityConfidence(
             instances[i].getConfidence(), instances[i].correct(), input, 0, input.size() - 1));
   }
   if (!sorted) Collections.sort(confidences, new ConfidenceComparator());
   this.nBins = DEFAULT_NUM_BINS;
   this.numCorrect = getNumCorrectEntities();
 }
  public static Sequence getLongestDepSetSubSequence(Sequence s) {
    BitSet indices = longestDepSet(s, s.size() - 1);
    int length = indices.length();
    assert indices.get(length - 1);

    Map<Integer, Integer> newIdx = new LinkedHashMap<Integer, Integer>();
    int count = 0;
    for (int i = 0; i < length; i++) {
      if (!indices.get(i)) continue;
      newIdx.put(i, count++);
    }

    Sequence news = new Sequence();
    for (int i = 0; i < length; i++) {
      if (!indices.get(i)) continue;
      List<Variable> oldins = s.getInputs(i);
      List<Variable> newins = new ArrayList<Variable>(oldins.size());
      for (Variable oldv : oldins) {
        newins.add(news.getVariable(newIdx.get(oldv.index)));
      }
      news = news.extend(s.getStatementKind(i), newins);
    }
    return news;
  }
  // This method is responsible for doing two things:
  //
  // 1. Selecting at random a collection of sequences that can be used to
  //    create input values for the given statement, and
  //
  // 2. Selecting at random valid indices to the above sequence specifying
  //    the values to be used as input to the statement.
  //
  // The selected sequences and indices are wrapped in an InputsAndSuccessFlag
  // object and returned. If an appropriate collection of sequences and indices
  // was not found (e.g. because there are no sequences in the componentManager
  // that create values of some type required by the statement), the success flag
  // of the returned object is false.
  @SuppressWarnings("unchecked")
  private InputsAndSuccessFlag selectInputs(StatementKind statement) {
    Tracer.trace("selectInputs");

    // Variable inputTypes containsthe  values required as input to the
    // statement given as a parameter to the selectInputs method.

    List<Class<?>> inputTypes = statement.getInputTypes();

    // The rest of the code in this method will attempt to create
    // a sequence that creates at least one value of type T for
    // every type T in inputTypes, and thus can be used to create all the
    // inputs for the statement.
    // We denote this goal sequence as "S". We don't create S explicitly, but
    // define it as the concatenation of the following list of sequences.
    // In other words, S = sequences[0] + ... + sequences[sequences.size()-1].
    // (This representation choice is for efficiency: it is cheaper to perform
    //  a single concatenation of the subsequences in the end than repeatedly
    // extending S.)

    List<Sequence> sequences = new ArrayList<Sequence>();

    // We store the total size of S in the following variable.

    int totStatements = 0;

    // The method also returns a list of randomly-selected variables to
    // be used as inputs to the statement, represented as indices into S.
    // For example, given as statement a method M(T1)/T2 that takes as input
    // a value of type T1 and returns a value of type T2, this method might
    // return, for example, the sequence
    //
    // T0 var0 = new T0(); T1 var1 = var0.getT1()"
    //
    // and the singleton list [0] that represents variable var1. The variable
    // indices are stored in the following list. Upon successful completion
    // of this method, variables will contain inputTypes.size() variables.
    // Note additionally that for every i in variables, 0 <= i < |S|.

    List<Integer> variables = new ArrayList<Integer>();

    // [Optimization]
    // The following two variables are used in the loop below only when
    // an alias ratio is present (GenInputsAbstract.alias_ratio != null).
    // Their purpose is purely to improve efficiency. For a given loop iteration
    // i, "types" contains the types of all variables in S, and  "typesToVars"
    // maps each type to all variable indices of the given type.
    SubTypeSet types = new SubTypeSet(false);
    MultiMap<Class<?>, Integer> typesToVars = new MultiMap<Class<?>, Integer>();

    for (int i = 0; i < inputTypes.size(); i++) {
      Class<?> t = inputTypes.get(i);

      // TODO Does this ever happen?
      if (!Reflection.isVisible(t)) return new InputsAndSuccessFlag(false, null, null);

      // true if statement st represents an instance method, and we are currently
      // selecting a value to act as the receiver for the method.
      boolean isReceiver =
          (i == 0 && (statement instanceof RMethod) && (!((RMethod) statement).isStatic()));

      // If alias ratio is given, attempt with some probability to use a variable already in S.
      if (GenInputsAbstract.alias_ratio != 0
          && Randomness.weighedCoinFlip(GenInputsAbstract.alias_ratio)) {
        Tracer.trace("alias_ratio@selectInputs");

        // candidateVars will store the indices that can serve as input to the i-th input in st.
        List<SimpleList<Integer>> candidateVars = new ArrayList<SimpleList<Integer>>();

        // For each type T in S compatible with inputTypes[i], add all the indices in S of type T.
        for (Class<?> match : types.getMatches(t)) {
          // Sanity check: the domain of typesToVars contains all the types in variable types.
          assert typesToVars.keySet().contains(match);
          candidateVars.add(
              new ArrayListSimpleList<Integer>(
                  new ArrayList<Integer>(typesToVars.getValues(match))));
        }

        // If any type-compatible variables found, pick one at random as the i-th input to st.
        SimpleList<Integer> candidateVars2 = new ListOfLists<Integer>(candidateVars);
        if (candidateVars2.size() > 0) {
          int randVarIdx = Randomness.nextRandomInt(candidateVars2.size());
          Integer randVar = candidateVars2.get(randVarIdx);
          variables.add(randVar);
          continue;
        }
      }
      Tracer.trace("NO alias_ratio@selectInputs");
      // If we got here, it means we will not attempt to use a value already defined in S,
      // so we will have to augment S with new statements that yield a value of type inputTypes[i].
      // We will do this by assembling a list of candidate sequences n(stored in the list declared
      // immediately below) that create one or more values of the appropriate type,
      // randomly selecting a single sequence from this list, and appending it to S.
      SimpleList<Sequence> l = null;

      // We use one of three ways to gather candidate sequences, but the third case below
      // is by far the most common.

      if (GenInputsAbstract.always_use_ints_as_objects && t.equals(Object.class)) {

        Tracer.trace("always_use_ints_as_objects@selectInputs");

        // 1. OBSCURE, applicable only for branch-directed generation project. Get all
        //    sequences that create one or more integer. Applicable only when inputTypes[i]
        //    is "Object" and always_use_ints_as_objects option is specified.
        if (Log.isLoggingOn()) Log.logLine("Integer-as-object heuristic: will use random Integer.");
        l = componentManager.getSequencesForType(int.class, false);

      } else if (t.isArray()) {

        // 2. If T=inputTypes[i] is an array type, ask the component manager for all sequences
        //    of type T (list l1), but also try to directly build some sequences that create arrays
        // (list l2).
        SimpleList<Sequence> l1 = componentManager.getSequencesForType(statement, i);
        if (Log.isLoggingOn())
          Log.logLine("Array creation heuristic: will create helper array of type " + t);
        SimpleList<Sequence> l2 = HelperSequenceCreator.createSequence(componentManager, t);
        l = new ListOfLists<Sequence>(l1, l2);

      } else {

        // 3. COMMON CASE: ask the component manager for all sequences that yield the required type.
        if (Log.isLoggingOn()) Log.logLine("Will query component set for objects of type" + t);
        l = componentManager.getSequencesForType(statement, i);
      }
      assert l != null;

      if (Log.isLoggingOn()) Log.logLine("components: " + l.size());

      // If we were not able to find (or create) any sequences of type inputTypes[i], and we are
      // allowed the use null values, use null. If we're not allowed, then return with failure.
      if (l.size() == 0) {
        Tracer.trace("selectinputs-evalforbid");
        if (isReceiver || GenInputsAbstract.forbid_null) {
          if (!isReceiver) {
            if (GenInputsAbstract.forbid_null) {
              Tracer.trace("forbid_null@selectinputs-evalforbid");
            }
          }
          if (Log.isLoggingOn())
            Log.logLine("forbid-null option is true. Failed to create new sequence.");
          return new InputsAndSuccessFlag(false, null, null);
        } else {
          if (!GenInputsAbstract.forbid_null) {
            Tracer.trace("NOT forbid_null@selectinputs-evalforbid");
          }
          if (Log.isLoggingOn()) Log.logLine("Will use null as " + i + "-th input");
          StatementKind st = PrimitiveOrStringOrNullDecl.nullOrZeroDecl(t);
          Sequence seq = new Sequence().extend(st, new ArrayList<Variable>());
          variables.add(totStatements);
          sequences.add(seq);
          assert seq.size() == 1;
          totStatements++;
          // Null is not an interesting value to add to the set of
          // possible values to reuse, so we don't update typesToVars or types.
          continue;
        }
      }

      // At this point, we have one or more sequences that create non-null values of type
      // inputTypes[i].
      // However, the user may have requested that we use null values as inputs with some given
      // frequency.
      // If this is the case, then use null instead with some probability.
      Tracer.trace("selectinputs-null-ratio");
      if (!isReceiver
          && GenInputsAbstract.null_ratio != 0
          && Randomness.weighedCoinFlip(GenInputsAbstract.null_ratio)) {
        Tracer.trace("null_ratio@selectinputs-null-ratio");
        if (Log.isLoggingOn())
          Log.logLine("null-ratio option given. Randomly decided to use null as input.");
        StatementKind st = PrimitiveOrStringOrNullDecl.nullOrZeroDecl(t);
        Sequence seq = new Sequence().extend(st, new ArrayList<Variable>());
        variables.add(totStatements);
        sequences.add(seq);
        assert seq.size() == 1;
        totStatements++;
        continue;
      }
      Tracer.trace("NOT null_ratio@selectinputs-null-ratio");

      // At this point, we have a list of candidate sequences and need to select a
      // randomly-chosen sequence from the list.
      Sequence chosenSeq = null;
      Tracer.trace("selectInputs-smalltests");
      if (GenInputsAbstract.small_tests) {
        Tracer.trace("small_tests@selectInputs-smalltests");
        chosenSeq = Randomness.randomMemberWeighted(l);
      } else {
        Tracer.trace("NO small_tests@selectInputs-smalltests");
        chosenSeq = Randomness.randomMember(l);
      }

      // Now, find values that satisfy the constraint set.
      Match m = Match.COMPATIBLE_TYPE;
      // if (i == 0 && statement.isInstanceMethod()) m = Match.EXACT_TYPE;
      Variable randomVariable = chosenSeq.randomVariableForTypeLastStatement(t, m);

      // We are not done yet: we have chosen a sequence that yields a value of the required
      // type inputTypes[i], but there may be more than one such value. Our last random
      // selection step is to select from among all possible values.
      // if (i == 0 && statement.isInstanceMethod()) m = Match.EXACT_TYPE;
      if (randomVariable == null) {
        throw new BugInRandoopException("type: " + t + ", sequence: " + chosenSeq);
      }

      // If we were unlucky and selected a null value as the receiver
      // for a method call, return with failure.
      if (i == 0
          && (statement instanceof RMethod)
          && (!((RMethod) statement).isStatic())
          && chosenSeq.getCreatingStatement(randomVariable) instanceof PrimitiveOrStringOrNullDecl)
        return new InputsAndSuccessFlag(false, null, null);

      // [Optimization.] Update optimization-related variables "types" and "typesToVars".
      Tracer.trace("selectinputs-alias");
      if (GenInputsAbstract.alias_ratio != 0) {
        Tracer.trace("alias_ratio@selectinputs-alias");
        // Update types and typesToVars.
        for (int j = 0; j < chosenSeq.size(); j++) {
          StatementKind stk = chosenSeq.getStatementKind(j);
          if (stk instanceof PrimitiveOrStringOrNullDecl)
            continue; // Prim decl not an interesting candidate for multiple uses.
          Class<?> outType = stk.getOutputType();
          types.add(outType);
          typesToVars.add(outType, totStatements + j);
        }
      }
      Tracer.trace("NOT alias_ratio@selectinputs-alias");

      variables.add(totStatements + randomVariable.index);
      sequences.add(chosenSeq);
      totStatements += chosenSeq.size();
    }

    return new InputsAndSuccessFlag(true, sequences, variables);
  }
  /**
   * Tries to create and execute a new sequence. If the sequence is new (not already in the
   * specified component manager), then it is executed and added to the manager's sequences. If the
   * sequence created is already in the manager's sequences, this method has no effect, and returns
   * null.
   */
  private ExecutableSequence createNewUniqueSequence() {

    Tracer.trace("createNewUniqueSequence");

    if (Log.isLoggingOn()) Log.logLine("-------------------------------------------");

    StatementKind statement = null;

    if (this.statements.isEmpty()) return null;

    // Select a StatementInfo
    statement = Randomness.randomMember(this.statements);
    if (Log.isLoggingOn()) Log.logLine("Selected statement: " + statement.toString());

    // jhp: add flags here
    InputsAndSuccessFlag sequences = selectInputs(statement);

    if (!sequences.success) {
      if (Log.isLoggingOn()) Log.logLine("Failed to find inputs for statement.");
      return null;
    }

    Sequence concatSeq = Sequence.concatenate(sequences.sequences);

    // Figure out input variables.
    List<Variable> inputs = new ArrayList<Variable>();
    for (Integer oneinput : sequences.indices) {
      Variable v = concatSeq.getVariable(oneinput);
      inputs.add(v);
    }

    Sequence newSequence = concatSeq.extend(statement, inputs);

    // With .5 probability, do a primitive value heuristic.
    Tracer.trace("heuristic-uniquesequence");
    if (GenInputsAbstract.repeat_heuristic && Randomness.nextRandomInt(10) == 0) {
      Tracer.trace("repeat_heuristic@heuristic-uniquesequence");
      int times = Randomness.nextRandomInt(100);
      newSequence = newSequence.repeatLast(times);
      if (Log.isLoggingOn()) Log.log(">>>" + times + newSequence.toCodeString());
    }
    Tracer.trace("NO_repeat_heuristic@heuristic-uniquesequence");

    // If parameterless statement, subsequence inputs
    // will all be redundant, so just remove it from list of statements.
    if (statement.getInputTypes().size() == 0) {
      statements.remove(statement);
    }

    // If sequence is larger than size limit, try again.
    Tracer.trace("evaluating-maxsize");
    if (newSequence.size() > GenInputsAbstract.maxsize) {
      Tracer.trace(">maxsize@evaluating-maxsize");
      if (Log.isLoggingOn())
        Log.logLine(
            "Sequence discarded because size "
                + newSequence.size()
                + " exceeds maximum allowed size "
                + GenInputsAbstract.maxsize);
      return null;
    }
    Tracer.trace("<maxsize@evaluating-maxsize");

    randoopConsistencyTests(newSequence);

    if (this.allSequences.contains(newSequence)) {
      Tracer.trace("discard existing");
      if (Log.isLoggingOn())
        Log.logLine("Sequence discarded because the same sequence was previously created.");
      return null;
    }

    this.allSequences.add(newSequence);

    for (Sequence s : sequences.sequences) {
      s.lastTimeUsed = java.lang.System.currentTimeMillis();
    }

    randoopConsistencyTest2(newSequence);

    if (Log.isLoggingOn()) {
      Log.logLine("Successfully created new unique sequence:" + newSequence.toString());
    }
    // System.out.println("###" + statement.toStringVerbose() + "###" + statement.getClass());

    // Keep track of any input sequences that are used in this sequence
    // Tests that contain only these sequences are probably redundant
    for (Sequence is : sequences.sequences) {
      subsumed_sequences.add(is);
    }

    return new ExecutableSequence(newSequence);
  }
Esempio n. 9
0
 public S8Vector(Sequence paramSequence) {
   this.data = new byte[paramSequence.size()];
   addAll(paramSequence);
 }
 public static BitSet longestDepSet(Sequence s) {
   return longestDepSet(s, s.size() - 1);
 }
  public void test(
      Transducer transducer,
      InstanceList data,
      String description,
      PrintStream viterbiOutputStream) {
    int[] ntrue = new int[segmentTags.length];
    int[] npred = new int[segmentTags.length];
    int[] ncorr = new int[segmentTags.length];

    LabelAlphabet dict = (LabelAlphabet) transducer.getInputPipe().getTargetAlphabet();

    for (int i = 0; i < data.size(); i++) {
      Instance instance = data.getInstance(i);
      Sequence input = (Sequence) instance.getData();
      Sequence trueOutput = (Sequence) instance.getTarget();
      assert (input.size() == trueOutput.size());
      Sequence predOutput = transducer.viterbiPath(input).output();
      assert (predOutput.size() == trueOutput.size());

      List trueSegs = new ArrayList();
      List predSegs = new ArrayList();

      addSegs(trueSegs, trueOutput);
      addSegs(predSegs, predOutput);

      //      System.out.println("FieldF1Evaluator instance "+instance.getName ());
      //      printSegs(dict, trueSegs, "True");
      //      printSegs(dict, predSegs, "Pred");

      for (Iterator it = predSegs.iterator(); it.hasNext(); ) {
        Segment seg = (Segment) it.next();
        npred[seg.tag]++;
        if (trueSegs.contains(seg)) {
          ncorr[seg.tag]++;
        }
      }

      for (Iterator it = trueSegs.iterator(); it.hasNext(); ) {
        Segment seg = (Segment) it.next();
        ntrue[seg.tag]++;
      }
    }

    DecimalFormat f = new DecimalFormat("0.####");
    logger.info(description + " per-field F1");
    for (int tag = 0; tag < segmentTags.length; tag++) {
      double precision = ((double) ncorr[tag]) / npred[tag];
      double recall = ((double) ncorr[tag]) / ntrue[tag];
      double f1 = (2 * precision * recall) / (precision + recall);
      Label name = dict.lookupLabel(segmentTags[tag]);
      logger.info(
          " segments "
              + name
              + "  true = "
              + ntrue[tag]
              + "  pred = "
              + npred[tag]
              + "  correct = "
              + ncorr[tag]);
      logger.info(
          " precision="
              + f.format(precision)
              + " recall="
              + f.format(recall)
              + " f1="
              + f.format(f1));
    }
  }
 public S16Vector(Sequence sequence) {
   data = new short[sequence.size()];
   addAll(sequence);
 }