@SuppressWarnings({"unchecked"})
        @Override
        protected void fillFeatures(
            Pair<Mention, ClusteredMention> input,
            Counter<Feature> inFeatures,
            Boolean output,
            Counter<Feature> outFeatures) {
          // --Input Features
          for (Object o : ACTIVE_FEATURES) {
            if (o instanceof Class) {
              // (case: singleton feature)
              Option<Double> count = new Option<Double>(1.0);
              Feature feat = feature((Class) o, input, count);
              if (count.get() > 0.0) {
                inFeatures.incrementCount(feat, count.get());
              }
            } else if (o instanceof Pair) {
              // (case: pair of features)
              Pair<Class, Class> pair = (Pair<Class, Class>) o;
              Option<Double> countA = new Option<Double>(1.0);
              Option<Double> countB = new Option<Double>(1.0);
              Feature featA = feature(pair.getFirst(), input, countA);
              Feature featB = feature(pair.getSecond(), input, countB);
              if (countA.get() * countB.get() > 0.0) {
                inFeatures.incrementCount(
                    new Feature.PairFeature(featA, featB), countA.get() * countB.get());
              }
            }
          }

          // --Output Features
          if (output != null) {
            outFeatures.incrementCount(new Feature.CoreferentIndicator(output), 1.0);
          }
        }
        private <E> Feature feature(
            Class<E> clazz, Pair<Mention, ClusteredMention> input, Option<Double> count) {

          // --Variables
          Mention onPrix =
              input.getFirst(); // the first mention (referred to as m_i in the handout)
          Mention candidate =
              input.getSecond().mention; // the second mention (referred to as m_j in the handout)
          Entity candidateCluster =
              input.getSecond().entity; // the cluster containing the second mention

          // --Features:w
          if (clazz.equals(Feature.ExactMatch.class)) {
            // (exact string match)
            return new Feature.ExactMatch(onPrix.gloss().equals(candidate.gloss()));
          } else if (clazz.equals(Feature.SentenceDist.class)) {
            return new Feature.SentenceDist(
                Math.abs(
                    onPrix.doc.indexOfMention(onPrix) - candidate.doc.indexOfMention(candidate)));
          } else if (clazz.equals(Feature.MentionDist.class)) {
            return new Feature.MentionDist(
                Math.abs(
                    onPrix.doc.indexOfSentence(onPrix.sentence)
                        - candidate.doc.indexOfSentence(candidate.sentence)));
          } else if (clazz.equals(Feature.EitherHeadWordPronoun.class)) {
            return new Feature.EitherHeadWordPronoun(
                Pronoun.isSomePronoun(onPrix.gloss()) || Pronoun.isSomePronoun(candidate.gloss()));
          } else if (clazz.equals(Feature.CandidateNERTag.class)) {
            return new Feature.CandidateNERTag(candidate.headToken().nerTag());
          } else if (clazz.equals(Feature.CandidateSpeaker.class)) {
            return new Feature.CandidateSpeaker(candidate.headToken().speaker());
          } else if (clazz.equals(Feature.FixedSpeaker.class)) {
            return new Feature.FixedSpeaker(onPrix.headToken().speaker());
          } else if (clazz.equals(Feature.HeadWordMatch.class)) {
            return new Feature.HeadWordMatch(onPrix.equals(candidate.headWord()));
          } else if (clazz.equals(Feature.HeadWordLemmaMatch.class)) {
            return new Feature.HeadWordLemmaMatch(
                onPrix.headToken().lemma().equals(candidate.headToken().lemma()));
          } else if (clazz.equals(Feature.FixedNERTag.class)) {
            return new Feature.FixedNERTag(onPrix.headToken().nerTag());
          } else if (clazz.equals(Feature.SpeakerMatch.class)) {
            return new Feature.SpeakerMatch(
                candidate.headToken().speaker().equals(onPrix.headToken().speaker()));
          } else if (clazz.equals(Feature.NERTagMatch.class)) {
            return new Feature.NERTagMatch(
                candidate.headToken().nerTag().equals(onPrix.headToken().nerTag()));
          } else if (clazz.equals(Feature.CandidatePOSTag.class)) {
            return new Feature.CandidatePOSTag(candidate.headToken().posTag());
          } else if (clazz.equals(Feature.FixedPOSTag.class)) {
            return new Feature.FixedPOSTag(onPrix.headToken().posTag());
          } else if (clazz.equals(Feature.GenderMatch.class)) {
            Pair<Boolean, Boolean> match = Util.haveGenderAndAreSameGender(onPrix, candidate);
            boolean finalMatch = (!match.getFirst() || match.getSecond());
            return new Feature.GenderMatch(finalMatch);
          } else if (clazz.equals(Feature.NumberMatch.class)) {
            Pair<Boolean, Boolean> match = Util.haveNumberAndAreSameNumber(onPrix, candidate);
            boolean finalMatch = (!match.getFirst() || match.getSecond());
            return new Feature.NumberMatch(finalMatch);
          }
          //			} else if(clazz.equals(Feature.NewFeature.class) {
          /*
           * TODO: Add features to return for specific classes. Implement calculating values of features here.
           */

          else {
            throw new IllegalArgumentException("Unregistered feature: " + clazz);
          }
        }
  public void train(List<SentencePair> trainingPairs) {
    sourceTargetCounts = new CounterMap<String, String>();
    sourceTargetDistortions = new CounterMap<Pair<Integer, Integer>, Pair<Integer, Integer>>();
    for (SentencePair pair : trainingPairs) {
      List<String> sourceSentence = pair.getSourceWords();
      List<String> targetSentence = pair.getTargetWords();
      targetSentence.add(WordAligner.NULL_WORD);
      int m = sourceSentence.size();
      int l = targetSentence.size();
      for (int i = 0; i < m; i++) {
        String sourceWord = sourceSentence.get(i);
        for (int j = 0; j < l; j++) {
          String targetWord = targetSentence.get(j);
          sourceTargetCounts.setCount(sourceWord, targetWord, 1.0);
          Pair<Integer, Integer> lmPair = new Pair<Integer, Integer>(l, m);
          Pair<Integer, Integer> jiPair = new Pair<Integer, Integer>(j, i);
          sourceTargetDistortions.setCount(jiPair, lmPair, 1.0);
        }
      }
    }

    // Use Model 1 to train params
    double delta = Double.POSITIVE_INFINITY;
    for (int i = 0; i < MAX_ITERS && delta > CONVERGENCE; i++) {
      CounterMap<String, String> tempSourceTargetCounts = new CounterMap<String, String>();
      Counter<String> targetCounts = new Counter<String>();
      delta = 0.0;
      for (SentencePair pair : trainingPairs) {
        List<String> sourceSentence = pair.getSourceWords();
        List<String> targetSentence = pair.getTargetWords();
        Counter<String> sourceTotals = new Counter<String>();

        for (String sourceWord : sourceSentence) {
          for (String targetWord : targetSentence) {
            sourceTotals.incrementCount(
                sourceWord, sourceTargetCounts.getCount(sourceWord, targetWord));
          }
        }
        for (String sourceWord : sourceSentence) {
          for (String targetWord : targetSentence) {
            double transProb = sourceTargetCounts.getCount(sourceWord, targetWord);
            double sourceTotal = sourceTotals.getCount(sourceWord);
            tempSourceTargetCounts.incrementCount(sourceWord, targetWord, transProb / sourceTotal);
            targetCounts.incrementCount(targetWord, transProb / sourceTotal);
          }
        }
      }

      // update t(s|t) values
      for (String sourceWord : tempSourceTargetCounts.keySet()) {
        for (String targetWord : tempSourceTargetCounts.getCounter(sourceWord).keySet()) {
          double oldProb = sourceTargetCounts.getCount(sourceWord, targetWord);
          double newProb =
              tempSourceTargetCounts.getCount(sourceWord, targetWord)
                  / targetCounts.getCount(targetWord);
          sourceTargetCounts.setCount(sourceWord, targetWord, newProb);
          delta += Math.pow(oldProb - newProb, 2.0);
        }
      }
      delta /= sourceTargetCounts.totalSize();
    }

    // Maximizing for ibm model 2

    delta = Double.POSITIVE_INFINITY;
    for (int iter = 0; iter < MAX_ITERS && delta > CONVERGENCE; iter++) {
      CounterMap<String, String> tempSourceTargetCounts = new CounterMap<String, String>();
      CounterMap<Pair<Integer, Integer>, Pair<Integer, Integer>> tempSourceTargetDistortions =
          new CounterMap<Pair<Integer, Integer>, Pair<Integer, Integer>>();
      Counter<String> targetCounts = new Counter<String>();
      CounterMap<Pair<Integer, Integer>, Integer> targetDistorts =
          new CounterMap<Pair<Integer, Integer>, Integer>();
      delta = 0.0;
      for (SentencePair pair : trainingPairs) {
        List<String> sourceSentence = pair.getSourceWords();
        List<String> targetSentence = pair.getTargetWords();
        CounterMap<Pair<Integer, Integer>, Integer> distortSourceTotals =
            new CounterMap<Pair<Integer, Integer>, Integer>();
        Pair<Integer, Integer> lmPair =
            new Pair<Integer, Integer>(targetSentence.size(), sourceSentence.size());
        for (int i = 0; i < sourceSentence.size(); i++) {
          String sourceWord = sourceSentence.get(i);
          for (int j = 0; j < targetSentence.size(); j++) {
            String targetWord = targetSentence.get(j);
            Pair<Integer, Integer> jiPair = new Pair<Integer, Integer>(j, i);
            double currTransProb = sourceTargetCounts.getCount(sourceWord, targetWord);
            double currAlignProb = sourceTargetDistortions.getCount(jiPair, lmPair);
            distortSourceTotals.incrementCount(lmPair, i, currTransProb * currAlignProb);
          }
        }
        for (int i = 0; i < sourceSentence.size(); i++) {
          String sourceWord = sourceSentence.get(i);
          double distortTransSourceTotal = distortSourceTotals.getCount(lmPair, i);
          for (int j = 0; j < targetSentence.size(); j++) {
            String targetWord = targetSentence.get(j);
            Pair<Integer, Integer> jiPair = new Pair<Integer, Integer>(j, i);
            double transProb = sourceTargetCounts.getCount(sourceWord, targetWord);
            double distortProb = sourceTargetDistortions.getCount(jiPair, lmPair);
            double update =
                (transProb * distortProb) / (distortTransSourceTotal); // q(j|ilm)t(f|e)/totals
            tempSourceTargetCounts.incrementCount(sourceWord, targetWord, update);
            tempSourceTargetDistortions.incrementCount(jiPair, lmPair, update);
            targetCounts.incrementCount(targetWord, update);
            targetDistorts.incrementCount(lmPair, i, update);
          }
        }
      }
      // update t(s|t) values
      double delta_trans = 0.0;
      for (String sourceWord : tempSourceTargetCounts.keySet()) {
        for (String targetWord : tempSourceTargetCounts.getCounter(sourceWord).keySet()) {
          double oldProb = sourceTargetCounts.getCount(sourceWord, targetWord);
          double newProb =
              tempSourceTargetCounts.getCount(sourceWord, targetWord)
                  / targetCounts.getCount(targetWord);
          sourceTargetCounts.setCount(sourceWord, targetWord, newProb);
          delta += Math.pow(oldProb - newProb, 2.0);
        }
      }
      // update q(j|ilm) values
      double delta_dist = 0.0;
      for (Pair<Integer, Integer> jiPair : tempSourceTargetDistortions.keySet()) {
        for (Pair<Integer, Integer> lmPair :
            tempSourceTargetDistortions.getCounter(jiPair).keySet()) {
          double oldProb = sourceTargetDistortions.getCount(jiPair, lmPair);
          double tempAlignProb = tempSourceTargetDistortions.getCount(jiPair, lmPair);
          double tempTargetDist = targetDistorts.getCount(lmPair, jiPair.getSecond());
          double newProb = tempAlignProb / tempTargetDist;
          sourceTargetDistortions.setCount(jiPair, lmPair, newProb);
          delta_dist += Math.pow(oldProb - newProb, 2.0);
        }
      }
      delta =
          (delta_trans / sourceTargetCounts.totalSize()
                  + delta_dist / sourceTargetDistortions.totalSize())
              / 2.0;
    }
  }